소스 검색

HADOOP-7106. Reorganize SVN layout to combine HDFS, Common, and MR in a single tree (project unsplit)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/MR-279@1134994 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon 14 년 전
커밋
7dc7c7a7dd
100개의 변경된 파일21830개의 추가작업 그리고 0개의 파일을 삭제
  1. 44 0
      common/.gitignore
  2. 10635 0
      common/CHANGES.txt
  3. 244 0
      common/LICENSE.txt
  4. 2 0
      common/NOTICE.txt
  5. 31 0
      common/README.txt
  6. 123 0
      common/bin/hadoop
  7. 331 0
      common/bin/hadoop-config.sh
  8. 167 0
      common/bin/hadoop-daemon.sh
  9. 34 0
      common/bin/hadoop-daemons.sh
  10. 99 0
      common/bin/rcc
  11. 65 0
      common/bin/slaves.sh
  12. 36 0
      common/bin/start-all.sh
  13. 37 0
      common/bin/stop-all.sh
  14. 1769 0
      common/build.xml
  15. 24 0
      common/conf/configuration.xsl
  16. 8 0
      common/conf/core-site.xml.template
  17. 54 0
      common/conf/hadoop-env.sh.template
  18. 72 0
      common/conf/hadoop-metrics.properties
  19. 16 0
      common/conf/hadoop-metrics2.properties.example
  20. 106 0
      common/conf/hadoop-policy.xml.template
  21. 149 0
      common/conf/log4j.properties
  22. 1 0
      common/conf/masters.template
  23. 1 0
      common/conf/slaves.template
  24. 57 0
      common/conf/ssl-client.xml.example
  25. 55 0
      common/conf/ssl-server.xml.example
  26. 261 0
      common/ivy.xml
  27. 42 0
      common/ivy/hadoop-common-instrumented-template.xml
  28. 151 0
      common/ivy/hadoop-common-template.xml
  29. 43 0
      common/ivy/hadoop-common-test-template.xml
  30. 50 0
      common/ivy/ivysettings.xml
  31. 62 0
      common/ivy/libraries.properties
  32. 11 0
      common/lib/jdiff/hadoop-core_0.20.0.xml
  33. 11 0
      common/lib/jdiff/hadoop-core_0.21.0.xml
  34. 11 0
      common/lib/jdiff/hadoop_0.17.0.xml
  35. 11 0
      common/lib/jdiff/hadoop_0.18.1.xml
  36. 11 0
      common/lib/jdiff/hadoop_0.18.2.xml
  37. 11 0
      common/lib/jdiff/hadoop_0.18.3.xml
  38. 11 0
      common/lib/jdiff/hadoop_0.19.0.xml
  39. 11 0
      common/lib/jdiff/hadoop_0.19.1.xml
  40. 11 0
      common/lib/jdiff/hadoop_0.19.2.xml
  41. 11 0
      common/lib/jdiff/hadoop_0.20.0.xml
  42. 11 0
      common/lib/jdiff/hadoop_0.20.1.xml
  43. 11 0
      common/lib/jdiff/hadoop_0.20.2.xml
  44. 11 0
      common/src/contrib/bash-tab-completion/README
  45. 121 0
      common/src/contrib/bash-tab-completion/hadoop.sh
  46. 305 0
      common/src/contrib/build-contrib.xml
  47. 64 0
      common/src/contrib/build.xml
  48. 15 0
      common/src/contrib/ec2/README.txt
  49. 71 0
      common/src/contrib/ec2/bin/cmd-hadoop-cluster
  50. 80 0
      common/src/contrib/ec2/bin/create-hadoop-image
  51. 60 0
      common/src/contrib/ec2/bin/delete-hadoop-cluster
  52. 65 0
      common/src/contrib/ec2/bin/hadoop-ec2
  53. 93 0
      common/src/contrib/ec2/bin/hadoop-ec2-env.sh.template
  54. 171 0
      common/src/contrib/ec2/bin/hadoop-ec2-init-remote.sh
  55. 80 0
      common/src/contrib/ec2/bin/image/create-hadoop-image-remote
  56. 63 0
      common/src/contrib/ec2/bin/image/ec2-run-user-data
  57. 42 0
      common/src/contrib/ec2/bin/launch-hadoop-cluster
  58. 119 0
      common/src/contrib/ec2/bin/launch-hadoop-master
  59. 59 0
      common/src/contrib/ec2/bin/launch-hadoop-slaves
  60. 33 0
      common/src/contrib/ec2/bin/list-hadoop-clusters
  61. 48 0
      common/src/contrib/ec2/bin/terminate-hadoop-cluster
  62. 97 0
      common/src/contrib/failmon/README
  63. 54 0
      common/src/contrib/failmon/bin/failmon.sh
  64. 235 0
      common/src/contrib/failmon/bin/scheduler.py
  65. 120 0
      common/src/contrib/failmon/build.xml
  66. 25 0
      common/src/contrib/failmon/conf/commons-logging.properties
  67. 80 0
      common/src/contrib/failmon/conf/failmon.properties
  68. 39 0
      common/src/contrib/failmon/conf/global.config
  69. 10 0
      common/src/contrib/failmon/conf/hosts.list
  70. 40 0
      common/src/contrib/failmon/conf/log4j.properties
  71. 52 0
      common/src/contrib/failmon/ivy.xml
  72. 17 0
      common/src/contrib/failmon/ivy/libraries.properties
  73. 154 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Anonymizer.java
  74. 101 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/CPUParser.java
  75. 41 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Continuous.java
  76. 486 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Environment.java
  77. 151 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/EventRecord.java
  78. 120 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Executor.java
  79. 154 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/HDFSMerger.java
  80. 136 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/HadoopLogParser.java
  81. 282 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/LocalStore.java
  82. 214 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/LogParser.java
  83. 43 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/MonitorJob.java
  84. 53 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Monitored.java
  85. 140 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/NICParser.java
  86. 132 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/OfflineAnonymizer.java
  87. 163 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/PersistentState.java
  88. 120 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/RunOnce.java
  89. 206 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SMARTParser.java
  90. 112 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SensorsParser.java
  91. 163 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SerializedRecord.java
  92. 102 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/ShellParser.java
  93. 126 0
      common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SystemLogParser.java
  94. 272 0
      common/src/contrib/hod/CHANGES.txt
  95. 104 0
      common/src/contrib/hod/README
  96. 1 0
      common/src/contrib/hod/bin/VERSION
  97. 31 0
      common/src/contrib/hod/bin/checknodes
  98. 580 0
      common/src/contrib/hod/bin/hod
  99. 183 0
      common/src/contrib/hod/bin/hodcleanup
  100. 290 0
      common/src/contrib/hod/bin/hodring

+ 44 - 0
common/.gitignore

@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+*~
+.classpath
+.project
+.settings
+.svn
+build/
+build-fi/
+build.properties
+conf/masters
+conf/slaves
+conf/hadoop-env.sh
+conf/hadoop-site.xml
+conf/core-site.xml
+conf/mapred-site.xml
+conf/hdfs-site.xml
+conf/hadoop-policy.xml
+conf/capacity-scheduler.xml
+conf/mapred-queue-acls.xml
+docs/api/
+ivy/hadoop-core.xml
+ivy/hadoop-core-test.xml
+ivy/ivy-*.jar
+ivy/maven-ant-tasks-*.jar
+logs/
+src/contrib/ec2/bin/hadoop-ec2-env.sh
+src/docs/build
+src/docs/cn/build
+src/docs/cn/src/documentation/sitemap.xmap
+src/docs/cn/uming.conf

+ 10635 - 0
common/CHANGES.txt

@@ -0,0 +1,10635 @@
+Hn jaadoop Change Log
+
+Trunk (unreleased changes)
+
+  INCOMPATIBLE CHANGES
+   HADOOP-6904. Support method based RPC compatiblity. (hairong)
+
+  NEW FEATURES
+
+    HADOOP-7342. Add an utility API in FileUtil for JDK File.list
+    avoid NPEs on File.list() (Bharath Mundlapudi via mattf)
+
+    HADOOP-7322. Adding a util method in FileUtil for directory listing,
+    avoid NPEs on File.listFiles() (Bharath Mundlapudi via mattf)
+
+    HADOOP-6994. Api to get delegation token in AbstractFileSystem. (jitendra)
+
+    HADOOP-7171. Support UGI in FileContext API. (jitendra)
+
+    HADOOP-7257 Client side mount tables (sanjay)
+
+  IMPROVEMENTS
+
+    HADOOP-7133. Batch the calls in DataStorage to FileUtil.createHardLink().
+    (Matt Foley via jghoman)
+
+    HADOOP-7054 Change NN LoadGenerator to use FileContext APIs
+	  (Sanjay Radia)
+
+    HADOOP-7175. Add isEnabled() to Trash.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7180. Better support on CommandFormat on the API and exceptions.
+    (Daryn Sharp via szetszwo)
+
+    HADOOP-7202. Improve shell Command base class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7224. Add CommandFactory to shell.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7230. Move "fs -help" shell command tests from HDFS to COMMOM; see
+    also HDFS-1844.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7233. Refactor ls to conform to new FsCommand class.  (Daryn Sharp
+    via szetszwo)
+
+    HADOOP-7235. Refactor the tail command to conform to new FsCommand class.
+    (Daryn Sharp via szetszwo)
+
+    HADOOP-7227. Remove protocol version check at proxy creation in Hadoop
+    RPC. (jitendra)
+
+    HADOOP-7236. Refactor the mkdir command to conform to new FsCommand class.
+    (Daryn Sharp via szetszwo)
+
+    HADOOP-7114. FsShell should dump all exceptions at DEBUG level.
+    (todd via tomwhite)
+
+    HADOOP-7250. Refactor the setrep command to conform to new FsCommand class.
+    (Daryn Sharp via szetszwo)
+
+    HADOOP-7249. Refactor the chmod/chown/chgrp command to conform to new
+    FsCommand class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7251. Refactor the getmerge command to conform to new FsCommand
+    class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7265. Keep track of relative paths in PathData.  (Daryn Sharp
+    via szetszwo)
+
+    HADOOP-7238. Refactor the cat and text commands to conform to new FsCommand
+    class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7271. Standardize shell command error messages.  (Daryn Sharp
+    via szetszwo)
+
+    HADOOP-7275. Refactor the stat command to conform to new FsCommand
+    class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7237. Refactor the touchz command to conform to new FsCommand
+    class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7267. Refactor the rm/rmr/expunge commands to conform to new
+    FsCommand class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7285. Refactor the test command to conform to new FsCommand
+    class. (Daryn Sharp via todd)
+
+    HADOOP-7289. In ivy.xml, test conf should not extend common conf.
+    (Eric Yang via szetszwo)
+
+    HADOOP-7286. Refactor the du/dus/df commands to conform to new FsCommand
+    class. (Daryn Sharp via todd)
+
+    HADOOP-7320. Refactor the copy and move commands to conform to new
+    FsCommand class. (Daryn Sharp via todd)
+
+    HADOOP-7001.  Configuration changes can occur via the Reconfigurable
+    interface. (Patrick Kline via dhruba)
+
+    HADOOP-7331. Make hadoop-daemon.sh return exit code 1 if daemon processes
+    did not get started. (Tanping Wang via todd)
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+    HADOOP-7223. FileContext createFlag combinations are not clearly defined.
+    (suresh)
+
+    HADOOP-7215. RPC clients must use network interface corresponding to 
+    the host in the client's kerberos principal key. (suresh)
+
+    HADOOP-7268. FileContext.getLocalFSFileContext() behavior needs to be 
+    fixed w.r.t tokens. (jitendra)
+
+    HADOOP-7216. Add FsCommand.runAll() with deprecated annotation for the
+    transition of Command base class improvement.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7207. fs member of FSShell is not really needed (boryas)
+
+    HADOOP-7231. Fix synopsis for -count. (Daryn Sharp via eli).
+
+    HADOOP-6920, HADOOP-7292, HADOOP-7306. Porting bugfixes portion of the 
+    three patches to yahoo-merge branch.
+
+    HADOOP-7204. remove local unused fs variable from CmdHandler 
+    and FsShellPermissions.changePermissions (boryas)
+
+    HADOOP-7210. Chown command is not working from FSShell
+    (Uma Maheswara Rao G via todd)
+
+    HADOOP-7282. ipc.Server.getRemoteIp() may return null.  (John George
+    via szetszwo)
+
+    HADOOP-7336. TestFileContextResolveAfs will fail with default 
+    test.build.data property. (jitendra)
+
+    HADOOP-7284 Trash and shell's rm does not work for viewfs (Sanjay Radia)
+
+    HADOOP-7287. Configuration deprecation mechanism doesn't work properly for
+    GenericOptionsParser and Tools. (Aaron T. Myers via todd)
+
+Release 0.22.0 - Unreleased
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+    HADOOP-6791.  Refresh for proxy superuser config
+    (common part for HDFS-1096) (boryas)
+
+    HADOOP-6581. Add authenticated TokenIdentifiers to UGI so that 
+    they can be used for authorization (Kan Zhang and Jitendra Pandey 
+    via jghoman)
+
+    HADOOP-6584. Provide Kerberized SSL encryption for webservices.
+    (jghoman and Kan Zhang via jghoman)
+
+    HADOOP-6853. Common component of HDFS-1045. (jghoman)
+
+    HADOOP-6859 - Introduce additional statistics to FileSystem to track 
+    file system operations (suresh)
+
+    HADOOP-6870. Add a new API getFiles to FileSystem and FileContext that
+    lists all files under the input path or the subtree rooted at the
+    input path if recursive is true. Block locations are returned together
+    with each file's status. (hairong)
+
+    HADOOP-6888. Add a new FileSystem API closeAllForUGI(..) for closing all
+    file systems associated with a particular UGI.  (Devaraj Das and Kan Zhang
+    via szetszwo)
+
+    HADOOP-6892. Common component of HDFS-1150 (Verify datanodes' identities 
+    to clients in secure clusters) (jghoman)
+
+    HADOOP-6889. Make RPC to have an option to timeout. (hairong)
+
+    HADOOP-6996. Allow CodecFactory to return a codec object given a codec'
+    class name. (hairong)
+
+    HADOOP-7171. Support UGI in the FileContext. (jitendra)
+
+    HADOOP-6854. Options.createOpts should provide API to access Progress.
+    (Krishna Ramachandran via jitendra)
+
+    HADOOP-7013. Add boolean field isCorrupt to BlockLocation. 
+    (Patrick Kling via hairong)
+
+    HADOOP-6978. Adds support for NativeIO using JNI. 
+    (Todd Lipcon, Devaraj Das & Owen O'Malley via ddas)
+
+  IMPROVEMENTS
+
+    HADOOP-6644. util.Shell getGROUPS_FOR_USER_COMMAND method name 
+    should use common naming convention (boryas)
+
+    HADOOP-6778. add isRunning() method to 
+    AbstractDelegationTokenSecretManager (for HDFS-1044) (boryas)
+
+    HADOOP-6633. normalize property names for JT/NN kerberos principal 
+    names in configuration (boryas)
+
+    HADOOP-6627. "Bad Connection to FS" message in FSShell should print 
+    message from the exception (boryas)
+
+    HADOOP-6600. mechanism for authorization check for inter-server 
+    protocols. (boryas)
+
+    HADOOP-6623. Add StringUtils.split for non-escaped single-character
+    separator. (Todd Lipcon via tomwhite)
+
+    HADOOP-6761. The Trash Emptier has the ability to run more frequently.
+    (Dmytro Molkov via dhruba)
+
+    HADOOP-6714. Resolve compressed files using CodecFactory in FsShell::text.
+    (Patrick Angeles via cdouglas)
+
+    HADOOP-6661. User document for UserGroupInformation.doAs. 
+    (Jitendra Pandey via jghoman)
+
+    HADOOP-6674. Makes use of the SASL authentication options in the
+    SASL RPC. (Jitendra Pandey via ddas)
+
+    HADOOP-6526. Need mapping from long principal names to local OS 
+    user names. (boryas)
+
+    HADOOP-6814. Adds an API in UserGroupInformation to get the real
+    authentication method of a passed UGI. (Jitendra Pandey via ddas)
+
+    HADOOP-6756. Documentation for common configuration keys.
+    (Erik Steffl via shv)
+
+    HADOOP-6835. Add support for concatenated gzip input. (Greg Roelofs via
+    cdouglas)
+
+    HADOOP-6845. Renames the TokenStorage class to Credentials. 
+    (Jitendra Pandey via ddas)
+
+    HADOOP-6826. FileStatus needs unit tests. (Rodrigo Schmidt via Eli
+    Collins)
+
+    HADOOP-6905. add buildDTServiceName method to SecurityUtil 
+    (as part of MAPREDUCE-1718)  (boryas)
+
+    HADOOP-6632. Adds support for using different keytabs for different
+    servers in a Hadoop cluster. In the earier implementation, all servers 
+    of a certain type (like TaskTracker), would have the same keytab and the
+    same principal. Now the principal name is a pattern that has _HOST in it.
+    (Kan Zhang & Jitendra Pandey via ddas)
+
+    HADOOP-6861. Adds new non-static methods in Credentials to read and 
+    write token storage file. (Jitendra Pandey & Owen O'Malley via ddas)
+
+    HADOOP-6877. Common part of HDFS-1178 (NameNode servlets should communicate
+    with NameNode directrly). (Kan Zhang via jghoman)
+    
+    HADOOP-6475. Adding some javadoc to Server.RpcMetrics, UGI. 
+    (Jitendra Pandey and borya via jghoman)
+
+    HADOOP-6656. Adds a thread in the UserGroupInformation to renew TGTs 
+    periodically. (Owen O'Malley and ddas via ddas)
+
+    HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. (hairong)
+
+    HADOOP-6862. Adds api to add/remove user and group to AccessControlList
+    (amareshwari)
+
+    HADOOP-6911. doc update for DelegationTokenFetcher (boryas)
+
+    HADOOP-6900. Make the iterator returned by FileSystem#listLocatedStatus to 
+    throw IOException rather than RuntimeException when there is an IO error
+    fetching the next file. (hairong)
+
+    HADOOP-6905. Better logging messages when a delegation token is invalid.
+    (Kan Zhang via jghoman)
+
+    HADOOP-6693. Add metrics to track kerberol login activity. (suresh)
+
+    HADOOP-6803. Add native gzip read/write coverage to TestCodec.
+    (Eli Collins via tomwhite)
+
+    HADOOP-6950. Suggest that HADOOP_CLASSPATH should be preserved in 
+    hadoop-env.sh.template. (Philip Zeyliger via Eli Collins)
+
+    HADOOP-6922. Make AccessControlList a writable and update documentation
+    for Job ACLs.  (Ravi Gummadi via vinodkv)
+
+    HADOOP-6965. Introduces checks for whether the original tgt is valid 
+    in the reloginFromKeytab method.
+
+    HADOOP-6856. Simplify constructors for SequenceFile, and MapFile. (omalley)
+
+    HADOOP-6987. Use JUnit Rule to optionally fail test cases that run more
+    than 10 seconds (jghoman)
+
+    HADOOP-7005. Update test-patch.sh to remove callback to Hudson. (nigel)
+
+    HADOOP-6985. Suggest that HADOOP_OPTS be preserved in
+    hadoop-env.sh.template. (Ramkumar Vadali via cutting)
+
+    HADOOP-7007. Update the hudson-test-patch ant target to work with the
+    latest test-patch.sh script (gkesavan)
+
+    HADOOP-7010. Typo in FileSystem.java. (Jingguo Yao via eli)
+
+    HADOOP-7009. MD5Hash provides a public factory method that creates an
+    instance of thread local MessageDigest. (hairong)
+
+    HADOOP-7008. Enable test-patch.sh to have a configured number of acceptable 
+    findbugs and javadoc warnings. (nigel and gkesavan)
+
+    HADOOP-6818. Provides a JNI implementation of group resolution. (ddas)
+
+    HADOOP-6943. The GroupMappingServiceProvider interface should be public.
+    (Aaron T. Myers via tomwhite)
+
+    HADOOP-4675. Current Ganglia metrics implementation is incompatible with
+    Ganglia 3.1. (Brian Bockelman via tomwhite)
+
+    HADOOP-6977. Herriot daemon clients should vend statistics (cos)
+
+    HADOOP-7024. Create a test method for adding file systems during tests.
+    (Kan Zhang via jghoman)
+
+    HADOOP-6903. Make AbstractFSileSystem methods and some FileContext methods to be public. (Sanjay Radia via Sanjay Radia)
+
+    HADOOP-7034. Add TestPath tests to cover dot, dot dot, and slash normalization. (eli)
+
+    HADOOP-7032. Assert type constraints in the FileStatus constructor. (eli)
+
+    HADOOP-6562. FileContextSymlinkBaseTest should use FileContextTestHelper. (eli)
+
+    HADOOP-7028. ant eclipse does not include requisite ant.jar in the 
+    classpath. (Patrick Angeles via eli)
+
+    HADOOP-6864. Provide a JNI-based implementation of ShellBasedUnixGroupsNetgroupMapping 
+    (implementation of GroupMappingServiceProvider) (Erik Seffl via boryas)
+
+    HADOOP-7187. Fix socket leak in GangliaContext.  (Uma Maheswara Rao G
+    via szetszwo)
+
+    HADOOP-7241. fix typo of command 'hadoop fs -help tail'. 
+    (Wei Yongjun via eli)
+
+  OPTIMIZATIONS
+
+    HADOOP-6884. Add LOG.isDebugEnabled() guard for each LOG.debug(..).
+    (Erik Steffl via szetszwo)
+
+    HADOOP-6683. ZlibCompressor does not fully utilize the buffer.
+    (Kang Xiao via eli)
+
+  BUG FIXES
+
+    HADOOP-6638. try to relogin in a case of failed RPC connection (expired 
+    tgt) only in case the subject is loginUser or proxyUgi.realUser. (boryas)
+
+    HADOOP-6781. security audit log shouldn't have exception in it. (boryas)
+
+    HADOOP-6612.  Protocols RefreshUserToGroupMappingsProtocol and 
+    RefreshAuthorizationPolicyProtocol will fail with security enabled (boryas)
+
+    HADOOP-6764. Remove verbose logging from the Groups class. (Boris Shkolnik)
+
+    HADOOP-6730. Bug in FileContext#copy and provide base class for 
+    FileContext tests. (Ravi Phulari via jghoman)
+
+    HADOOP-6669. Respect compression configuration when creating DefaultCodec
+    instances. (Koji Noguchi via cdouglas)
+
+    HADOOP-6747. TestNetUtils fails on Mac OS X. (Todd Lipcon via jghoman)
+
+    HADOOP-6787. Factor out glob pattern code from FileContext and
+    Filesystem. Also fix bugs identified in HADOOP-6618 and make the
+    glob pattern code less restrictive and more POSIX standard
+    compliant. (Luke Lu via eli)
+
+    HADOOP-6649.  login object in UGI should be inside the subject (jnp via 
+    boryas)
+
+    HADOOP-6687.   user object in the subject in UGI should be reused in case 
+    of a relogin. (jnp via boryas)
+
+    HADOOP-6603. Provide workaround for issue with Kerberos not resolving 
+    cross-realm principal (Kan Zhang and Jitendra Pandey via jghoman)
+
+    HADOOP-6620. NPE if renewer is passed as null in getDelegationToken.
+    (Jitendra Pandey via jghoman)
+
+    HADOOP-6613. Moves the RPC version check ahead of the AuthMethod check.
+    (Kan Zhang via ddas)
+
+    HADOOP-6682. NetUtils:normalizeHostName does not process hostnames starting
+    with [a-f] correctly. (jghoman)
+
+    HADOOP-6652. Removes the unnecessary cache from 
+    ShellBasedUnixGroupsMapping. (ddas)
+
+    HADOOP-6815. refreshSuperUserGroupsConfiguration should use server side 
+    configuration for the refresh (boryas)
+
+    HADOOP-6648. Adds a check for null tokens in Credentials.addToken api.
+    (ddas)
+ 
+    HADOOP-6647. balancer fails with "is not authorized for protocol 
+    interface NamenodeProtocol" in secure environment (boryas)
+
+    HADOOP-6834. TFile.append compares initial key against null lastKey
+    (hong tang via mahadev)
+
+    HADOOP-6670. Use the UserGroupInformation's Subject as the criteria for
+    equals and hashCode. (Owen O'Malley and Kan Zhang via ddas)
+
+    HADOOP-6536. Fixes FileUtil.fullyDelete() not to delete the contents of
+    the sym-linked directory. (Ravi Gummadi via amareshwari)
+
+    HADOOP-6873. using delegation token over hftp for long 
+    running clients (boryas)
+
+    HADOOP-6706. Improves the sasl failure handling due to expired tickets,
+    and other server detected failures. (Jitendra Pandey and ddas via ddas)
+
+    HADOOP-6715. Fixes AccessControlList.toString() to return a descriptive
+    String representation of the ACL. (Ravi Gummadi via amareshwari)
+
+    HADOOP-6885. Fix java doc warnings in Groups and 
+    RefreshUserMappingsProtocol. (Eli Collins via jghoman) 
+
+    HADOOP-6482. GenericOptionsParser constructor that takes Options and 
+    String[] ignores options. (Eli Collins via jghoman)
+
+    HADOOP-6906.  FileContext copy() utility doesn't work with recursive
+    copying of directories. (vinod k v via mahadev)
+
+    HADOOP-6453. Hadoop wrapper script shouldn't ignore an existing 
+    JAVA_LIBRARY_PATH. (Chad Metcalf via jghoman)
+
+    HADOOP-6932.  Namenode start (init) fails because of invalid kerberos 
+    key, even when security set to "simple" (boryas)
+
+    HADOOP-6913. Circular initialization between UserGroupInformation and 
+    KerberosName (Kan Zhang via boryas)
+
+    HADOOP-6907. Rpc client doesn't use the per-connection conf to figure
+    out server's Kerberos principal (Kan Zhang via hairong)
+
+    HADOOP-6938. ConnectionId.getRemotePrincipal() should check if security
+    is enabled. (Kan Zhang via hairong)
+
+    HADOOP-6930. AvroRpcEngine doesn't work with generated Avro code. 
+    (sharad)
+
+    HADOOP-6940. RawLocalFileSystem's markSupported method misnamed 
+    markSupport. (Tom White via eli).
+
+    HADOOP-6951.  Distinct minicluster services (e.g. NN and JT) overwrite each
+    other's service policies.  (Aaron T. Myers via tomwhite)
+
+    HADOOP-6879. Provide SSH based (Jsch) remote execution API for system
+    tests (cos)
+
+    HADOOP-6989. Correct the parameter for SetFile to set the value type
+    for SetFile to be NullWritable instead of the key. (cdouglas via omalley)
+
+    HADOOP-6984. Combine the compress kind and the codec in the same option
+    for SequenceFiles. (cdouglas via omalley)
+
+    HADOOP-6933. TestListFiles is flaky. (Todd Lipcon via tomwhite)
+
+    HADOOP-6947.  Kerberos relogin should set refreshKrb5Config to true.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-7006. Fix 'fs -getmerge' command to not be a no-op.
+    (Chris Nauroth via cutting)
+
+    HADOOP-6663.  BlockDecompressorStream get EOF exception when decompressing
+    the file compressed from empty file.  (Kang Xiao via tomwhite)
+
+    HADOOP-6991.  Fix SequenceFile::Reader to honor file lengths and call
+    openFile (cdouglas via omalley)
+
+    HADOOP-7011.  Fix KerberosName.main() to not throw an NPE.
+    (Aaron T. Myers via tomwhite)
+
+    HADOOP-6975.  Integer overflow in S3InputStream for blocks > 2GB.
+    (Patrick Kling via tomwhite)
+
+    HADOOP-6758. MapFile.fix does not allow index interval definition.
+    (Gianmarco De Francisci Morales via tomwhite)
+
+    HADOOP-6926. SocketInputStream incorrectly implements read().
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-6899 RawLocalFileSystem#setWorkingDir() does not work for relative names
+     (Sanjay Radia)
+
+    HADOOP-6496. HttpServer sends wrong content-type for CSS files
+    (and others). (Todd Lipcon via tomwhite)
+
+    HADOOP-7057. IOUtils.readFully and IOUtils.skipFully have typo in
+    exception creation's message. (cos)
+
+Release 0.21.1 - Unreleased
+
+  IMPROVEMENTS
+
+    HADOOP-6934. Test for ByteWritable comparator.
+    (Johannes Zillmann via Eli Collins)
+
+    HADOOP-6786. test-patch needs to verify Herriot integrity (cos)
+
+  BUG FIXES
+
+    HADOOP-6925. BZip2Codec incorrectly implements read(). 
+    (Todd Lipcon via Eli Collins)
+
+    HADOOP-6833. IPC leaks call parameters when exceptions thrown.
+    (Todd Lipcon via Eli Collins)
+
+    HADOOP-6971. Clover build doesn't generate per-test coverage (cos)
+
+    HADOOP-6993. Broken link on cluster setup page of docs. (eli)
+
+    HADOOP-6944. [Herriot] Implement a functionality for getting proxy users
+    definitions like groups and hosts. (Vinay Thota via cos)
+
+    HADOOP-6954.  Sources JARs are not correctly published to the Maven
+    repository. (tomwhite)
+
+    HADOOP-7052. misspelling of threshold in conf/log4j.properties.
+    (Jingguo Yao via eli)
+
+    HADOOP-7053. wrong FSNamesystem Audit logging setting in 
+    conf/log4j.properties. (Jingguo Yao via eli)
+
+    HADOOP-7162. Rmove a duplicated call FileSystem.listStatus(..) in FsShell.
+    (Alexey Diomin via szetszwo)
+
+    HADOOP-7117. Remove fs.checkpoint.* from core-default.xml and replace
+    fs.checkpoint.* with dfs.namenode.checkpoint.* in documentations.
+    (Harsh J Chouraria via szetszwo)
+
+    HADOOP-7193. Correct the "fs -touchz" command help message.
+    (Uma Maheswara Rao G via szetszwo)
+
+    HADOOP-7174. Null is displayed in the "fs -copyToLocal" command.
+    (Uma Maheswara Rao G via szetszwo)
+
+    HADOOP-7194. Fix resource leak in IOUtils.copyBytes(..).
+    (Devaraj K via szetszwo)
+
+Release 0.21.0 - 2010-08-13
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-4895. Remove deprecated methods DFSClient.getHints(..) and
+    DFSClient.isDirectory(..).  (szetszwo)
+
+    HADOOP-4941. Remove deprecated FileSystem methods: getBlockSize(Path f),
+    getLength(Path f) and getReplication(Path src).  (szetszwo)
+
+    HADOOP-4648. Remove obsolete, deprecated InMemoryFileSystem and
+    ChecksumDistributedFileSystem.  (cdouglas via szetszwo)
+
+    HADOOP-4940. Remove a deprecated method FileSystem.delete(Path f).  (Enis
+    Soztutar via szetszwo)
+
+    HADOOP-4010. Change semantics for LineRecordReader to read an additional
+    line per split- rather than moving back one character in the stream- to
+    work with splittable compression codecs. (Abdul Qadeer via cdouglas)
+
+    HADOOP-5094. Show hostname and separate live/dead datanodes in DFSAdmin
+    report.  (Jakob Homan via szetszwo)
+
+    HADOOP-4942. Remove deprecated FileSystem methods getName() and
+    getNamed(String name, Configuration conf).  (Jakob Homan via szetszwo)
+
+    HADOOP-5486. Removes the CLASSPATH string from the command line and instead
+    exports it in the environment. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2827. Remove deprecated NetUtils::getServerAddress. (cdouglas)
+
+    HADOOP-5681. Change examples RandomWriter and RandomTextWriter to 
+    use new mapreduce API. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5680. Change org.apache.hadoop.examples.SleepJob to use new 
+    mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5699. Change org.apache.hadoop.examples.PiEstimator to use 
+    new mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5720. Introduces new task types - JOB_SETUP, JOB_CLEANUP
+    and TASK_CLEANUP. Removes the isMap methods from TaskID/TaskAttemptID
+    classes. (ddas)
+
+    HADOOP-5668. Change TotalOrderPartitioner to use new API. (Amareshwari
+    Sriramadasu via cdouglas)
+
+    HADOOP-5738. Split "waiting_tasks" JobTracker metric into waiting maps and
+    waiting reduces. (Sreekanth Ramakrishnan via cdouglas)
+
+    HADOOP-5679. Resolve findbugs warnings in core/streaming/pipes/examples. 
+    (Jothi Padmanabhan via sharad)
+
+    HADOOP-4359. Support for data access authorization checking on Datanodes.
+    (Kan Zhang via rangadi)
+
+    HADOOP-5690. Change org.apache.hadoop.examples.DBCountPageView to use 
+    new mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5694. Change org.apache.hadoop.examples.dancing to use new 
+    mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5696. Change org.apache.hadoop.examples.Sort to use new 
+    mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5698. Change org.apache.hadoop.examples.MultiFileWordCount to 
+    use new mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5913. Provide ability to an administrator to stop and start
+    job queues. (Rahul Kumar Singh and Hemanth Yamijala via yhemanth)
+
+    MAPREDUCE-711. Removed Distributed Cache from Common, to move it
+    under Map/Reduce. (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-6201. Change FileSystem::listStatus contract to throw
+    FileNotFoundException if the directory does not exist, rather than letting
+    this be implementation-specific. (Jakob Homan via cdouglas)
+
+    HADOOP-6230. Moved process tree and memory calculator related classes
+    from Common to Map/Reduce. (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-6203. FsShell rm/rmr error message indicates exceeding Trash quota
+    and suggests using -skpTrash, when moving to trash fails.
+    (Boris Shkolnik via suresh)
+
+    HADOOP-6303. Eclipse .classpath template has outdated jar files and is
+    missing some new ones.  (cos)
+
+    HADOOP-6396. Fix uninformative exception message when unable to parse
+    umask. (jghoman)
+
+    HADOOP-6299. Reimplement the UserGroupInformation to use the OS
+    specific and Kerberos JAAS login. (omalley)
+
+    HADOOP-6686. Remove redundant exception class name from the exception
+    message for the exceptions thrown at RPC client. (suresh)
+
+    HADOOP-6701. Fix incorrect exit codes returned from chmod, chown and chgrp
+    commands from FsShell. (Ravi Phulari via suresh)
+
+  NEW FEATURES
+
+    HADOOP-6332. Large-scale Automated Test Framework. (sharad, Sreekanth
+    Ramakrishnan, at all via cos)
+
+    HADOOP-4268. Change fsck to use ClientProtocol methods so that the
+    corresponding permission requirement for running the ClientProtocol
+    methods will be enforced.  (szetszwo)
+
+    HADOOP-3953. Implement sticky bit for directories in HDFS. (Jakob Homan
+    via szetszwo)
+
+    HADOOP-4368. Implement df in FsShell to show the status of a FileSystem.
+    (Craig Macdonald via szetszwo)
+
+    HADOOP-3741. Add a web ui to the SecondaryNameNode for showing its status.
+    (szetszwo)
+
+    HADOOP-5018. Add pipelined writers to Chukwa. (Ari Rabkin via cdouglas)
+
+    HADOOP-5052. Add an example computing exact digits of pi using the
+    Bailey-Borwein-Plouffe algorithm. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-4927. Adds a generic wrapper around outputformat to allow creation of
+    output on demand (Jothi Padmanabhan via ddas)
+
+    HADOOP-5144. Add a new DFSAdmin command for changing the setting of restore
+    failed storage replicas in namenode. (Boris Shkolnik via szetszwo)
+
+    HADOOP-5258. Add a new DFSAdmin command to print a tree of the rack and
+    datanode topology as seen by the namenode.  (Jakob Homan via szetszwo)
+    
+    HADOOP-4756. A command line tool to access JMX properties on NameNode
+    and DataNode. (Boris Shkolnik via rangadi)
+
+    HADOOP-4539. Introduce backup node and checkpoint node. (shv)
+
+    HADOOP-5363. Add support for proxying connections to multiple clusters with
+    different versions to hdfsproxy. (Zhiyong Zhang via cdouglas)
+
+    HADOOP-5528. Add a configurable hash partitioner operating on ranges of
+    BinaryComparable keys. (Klaas Bosteels via shv)
+
+    HADOOP-5257. HDFS servers may start and stop external components through
+    a plugin interface. (Carlos Valiente via dhruba)
+
+    HADOOP-5450. Add application-specific data types to streaming's typed bytes
+    interface. (Klaas Bosteels via omalley)
+
+    HADOOP-5518. Add contrib/mrunit, a MapReduce unit test framework.
+    (Aaron Kimball via cutting)
+
+    HADOOP-5469.  Add /metrics servlet to daemons, providing metrics
+    over HTTP as either text or JSON.  (Philip Zeyliger via cutting)
+
+    HADOOP-5467. Introduce offline fsimage image viewer. (Jakob Homan via shv)
+
+    HADOOP-5752. Add a new hdfs image processor, Delimited, to oiv. (Jakob
+    Homan via szetszwo)
+
+    HADOOP-5266. Adds the capability to do mark/reset of the reduce values 
+    iterator in the Context object API. (Jothi Padmanabhan via ddas)
+
+    HADOOP-5745. Allow setting the default value of maxRunningJobs for all
+    pools. (dhruba via matei)
+
+    HADOOP-5643. Adds a way to decommission TaskTrackers while the JobTracker
+    is running. (Amar Kamat via ddas)
+
+    HADOOP-4829. Allow FileSystem shutdown hook to be disabled.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-5815. Sqoop: A database import tool for Hadoop.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-4861. Add disk usage with human-readable size (-duh).
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-5844. Use mysqldump when connecting to local mysql instance in Sqoop.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-5976. Add a new command, classpath, to the hadoop script.  (Owen
+    O'Malley and Gary Murry via szetszwo)
+
+    HADOOP-6120. Add support for Avro specific and reflect data.
+    (sharad via cutting)
+
+    HADOOP-6226. Moves BoundedByteArrayOutputStream from the tfile package to
+    the io package and makes it available to other users (MAPREDUCE-318). 
+    (Jothi Padmanabhan via ddas)
+
+    HADOOP-6105. Adds support for automatically handling deprecation of
+    configuration keys. (V.V.Chaitanya Krishna via yhemanth)
+    
+    HADOOP-6235. Adds new method to FileSystem for clients to get server
+    defaults. (Kan Zhang via suresh)
+
+    HADOOP-6234. Add new option dfs.umaskmode to set umask in configuration
+    to use octal or symbolic instead of decimal. (Jakob Homan via suresh)
+
+    HADOOP-5073. Add annotation mechanism for interface classification.
+    (Jakob Homan via suresh)
+
+    HADOOP-4012. Provide splitting support for bzip2 compressed files. (Abdul
+    Qadeer via cdouglas)
+
+    HADOOP-6246. Add backward compatibility support to use deprecated decimal 
+    umask from old configuration. (Jakob Homan via suresh)
+
+    HADOOP-4952. Add new improved file system interface FileContext for the
+    application writer (Sanjay Radia via suresh)
+
+    HADOOP-6170. Add facility to tunnel Avro RPCs through Hadoop RPCs.
+    This permits one to take advantage of both Avro's RPC versioning
+    features and Hadoop's proven RPC scalability.  (cutting)
+
+    HADOOP-6267. Permit building contrib modules located in external
+    source trees.  (Todd Lipcon via cutting)
+
+    HADOOP-6240. Add new FileContext rename operation that posix compliant
+    that allows overwriting existing destination. (suresh)
+
+    HADOOP-6204. Implementing aspects development and fault injeciton
+    framework for Hadoop (cos)
+
+    HADOOP-6313. Implement Syncable interface in FSDataOutputStream to expose
+    flush APIs to application users. (Hairong Kuang via suresh)
+
+    HADOOP-6284. Add a new parameter, HADOOP_JAVA_PLATFORM_OPTS, to
+    hadoop-config.sh so that it allows setting java command options for
+    JAVA_PLATFORM.  (Koji Noguchi via szetszwo)
+
+    HADOOP-6337. Updates FilterInitializer class to be more visible,
+    and the init of the class is made to take a Configuration argument.
+    (Jakob Homan via ddas)
+
+    Hadoop-6223. Add new file system interface AbstractFileSystem with
+    implementation of some file systems that delegate to old FileSystem.
+    (Sanjay Radia via suresh)
+
+    HADOOP-6433. Introduce asychronous deletion of files via a pool of
+    threads. This can be used to delete files in the Distributed
+    Cache. (Zheng Shao via dhruba)
+
+    HADOOP-6415. Adds a common token interface for both job token and 
+    delegation token. (Kan Zhang via ddas)
+
+    HADOOP-6408. Add a /conf servlet to dump running configuration.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-6520. Adds APIs to read/write Token and secret keys. Also
+    adds the automatic loading of tokens into UserGroupInformation
+    upon login. The tokens are read from a file specified in the
+    environment variable. (ddas)
+
+    HADOOP-6419. Adds SASL based authentication to RPC.
+    (Kan Zhang via ddas)
+
+    HADOOP-6510. Adds a way for superusers to impersonate other users
+    in a secure environment. (Jitendra Nath Pandey via ddas)
+
+    HADOOP-6421. Adds Symbolic links to FileContext, AbstractFileSystem.
+    It also adds a limited implementation for the local file system
+     (RawLocalFs) that allows local symlinks. (Eli Collins via Sanjay Radia)
+
+    HADOOP-6577. Add hidden configuration option "ipc.server.max.response.size"
+    to change the default 1 MB, the maximum size when large IPC handler 
+    response buffer is reset. (suresh)
+
+    HADOOP-6568. Adds authorization for the default servlets. 
+    (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-6586. Log authentication and authorization failures and successes
+    for RPC (boryas)
+
+    HADOOP-6580. UGI should contain authentication method. (jnp via boryas)
+    
+    HADOOP-6657. Add a capitalization method to StringUtils for MAPREDUCE-1545.
+    (Luke Lu via Steve Loughran)
+
+    HADOOP-6692. Add FileContext#listStatus that returns an iterator.
+    (hairong)
+
+    HADOOP-6869. Functionality to create file or folder on a remote daemon
+    side (Vinay Thota via cos)
+
+  IMPROVEMENTS
+
+    HADOOP-6798. Align Ivy version for all Hadoop subprojects. (cos)
+
+    HADOOP-6777. Implement a functionality for suspend and resume a process.
+    (Vinay Thota via cos)
+
+    HADOOP-6772. Utilities for system tests specific. (Vinay Thota via cos)
+
+    HADOOP-6771. Herriot's artifact id for Maven deployment should be set to
+    hadoop-core-instrumented (cos)
+
+    HADOOP-6752. Remote cluster control functionality needs JavaDocs
+    improvement (Balaji Rajagopalan via cos).
+
+    HADOOP-4565. Added CombineFileInputFormat to use data locality information
+    to create splits. (dhruba via zshao)
+
+    HADOOP-4936. Improvements to TestSafeMode. (shv)
+
+    HADOOP-4985. Remove unnecessary "throw IOException" declarations in
+    FSDirectory related methods.  (szetszwo)
+
+    HADOOP-5017. Change NameNode.namesystem declaration to private.  (szetszwo)
+
+    HADOOP-4794. Add branch information from the source version control into
+    the version information that is compiled into Hadoop. (cdouglas via 
+    omalley)
+
+    HADOOP-5070. Increment copyright year to 2009, remove assertions of ASF
+    copyright to licensed files. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-5037. Deprecate static FSNamesystem.getFSNamesystem().  (szetszwo)
+
+    HADOOP-5088. Include releaseaudit target as part of developer test-patch
+    target.  (Giridharan Kesavan via nigel)
+
+    HADOOP-2721. Uses setsid when creating new tasks so that subprocesses of 
+    this process will be within this new session (and this process will be 
+    the process leader for all the subprocesses). Killing the process leader,
+    or the main Java task in Hadoop's case, kills the entire subtree of
+    processes. (Ravi Gummadi via ddas)
+
+    HADOOP-5097. Remove static variable JspHelper.fsn, a static reference to
+    a non-singleton FSNamesystem object.  (szetszwo)
+
+    HADOOP-3327. Improves handling of READ_TIMEOUT during map output copying.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5124. Choose datanodes randomly instead of starting from the first
+    datanode for providing fairness.  (hairong via szetszwo)
+
+    HADOOP-4930. Implement a Linux native executable that can be used to 
+    launch tasks as users. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5122. Fix format of fs.default.name value in libhdfs test conf.
+    (Craig Macdonald via tomwhite)
+
+    HADOOP-5038. Direct daemon trace to debug log instead of stdout. (Jerome
+    Boulon via cdouglas)
+
+    HADOOP-5101. Improve packaging by adding 'all-jars' target building core,
+    tools, and example jars. Let findbugs depend on this rather than the 'tar'
+    target. (Giridharan Kesavan via cdouglas)
+
+    HADOOP-4868. Splits the hadoop script into three parts - bin/hadoop, 
+    bin/mapred and bin/hdfs. (Sharad Agarwal via ddas)
+
+    HADOOP-1722. Adds support for TypedBytes and RawBytes in Streaming.
+    (Klaas Bosteels via ddas)
+
+    HADOOP-4220. Changes the JobTracker restart tests so that they take much
+    less time. (Amar Kamat via ddas)
+
+    HADOOP-4885. Try to restore failed name-node storage directories at 
+    checkpoint time. (Boris Shkolnik via shv)
+
+    HADOOP-5209. Update year to 2009 for javadoc.  (szetszwo)
+
+    HADOOP-5279. Remove unnecessary targets from test-patch.sh.
+    (Giridharan Kesavan via nigel)
+
+    HADOOP-5120. Remove the use of FSNamesystem.getFSNamesystem() from 
+    UpgradeManagerNamenode and UpgradeObjectNamenode.  (szetszwo)
+
+    HADOOP-5222. Add offset to datanode clienttrace. (Lei Xu via cdouglas)
+
+    HADOOP-5240. Skip re-building javadoc when it is already
+    up-to-date. (Aaron Kimball via cutting)
+
+    HADOOP-5042. Add a cleanup stage to log rollover in Chukwa appender.
+    (Jerome Boulon via cdouglas)
+
+    HADOOP-5264. Removes redundant configuration object from the TaskTracker.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-5232. Enable patch testing to occur on more than one host.
+    (Giri Kesavan via nigel)
+
+    HADOOP-4546. Fix DF reporting for AIX. (Bill Habermaas via cdouglas)
+
+    HADOOP-5023. Add Tomcat support to HdfsProxy. (Zhiyong Zhang via cdouglas)
+    
+    HADOOP-5317. Provide documentation for LazyOutput Feature. 
+    (Jothi Padmanabhan via johan)
+
+    HADOOP-5455. Document rpc metrics context to the extent dfs, mapred, and
+    jvm contexts are documented. (Philip Zeyliger via cdouglas)
+
+    HADOOP-5358. Provide scripting functionality to the synthetic load
+    generator. (Jakob Homan via hairong)
+
+    HADOOP-5442. Paginate jobhistory display and added some search
+    capabilities. (Amar Kamat via acmurthy) 
+
+    HADOOP-4842. Streaming now allows specifiying a command for the combiner.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5196. avoiding unnecessary byte[] allocation in 
+    SequenceFile.CompressedBytes and SequenceFile.UncompressedBytes.
+    (hong tang via mahadev)
+
+    HADOOP-4655. New method FileSystem.newInstance() that always returns
+    a newly allocated FileSystem object. (dhruba)
+
+    HADOOP-4788. Set Fair scheduler to assign both a map and a reduce on each
+    heartbeat by default. (matei)
+
+    HADOOP-5491.  In contrib/index, better control memory usage.
+    (Ning Li via cutting)
+
+    HADOOP-5423. Include option of preserving file metadata in
+    SequenceFile::sort. (Michael Tamm via cdouglas)
+
+    HADOOP-5331. Add support for KFS appends. (Sriram Rao via cdouglas)
+
+    HADOOP-4365. Make Configuration::getProps protected in support of
+    meaningful subclassing. (Steve Loughran via cdouglas)
+
+    HADOOP-2413. Remove the static variable FSNamesystem.fsNamesystemObject.
+    (Konstantin Shvachko via szetszwo)
+
+    HADOOP-4584. Improve datanode block reports and associated file system
+    scan to avoid interefering with normal datanode operations.
+    (Suresh Srinivas via rangadi)
+
+    HADOOP-5502. Documentation for backup and checkpoint nodes.
+    (Jakob Homan via shv)
+
+    HADOOP-5485. Mask actions in the fair scheduler's servlet UI based on
+    value of webinterface.private.actions. 
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-5581. HDFS should throw FileNotFoundException when while opening
+    a file that does not exist. (Brian Bockelman via rangadi)
+
+    HADOOP-5509. PendingReplicationBlocks does not start monitor in the
+    constructor. (shv)
+
+    HADOOP-5494. Modify sorted map output merger to lazily read values,
+    rather than buffering at least one record for each segment. (Devaraj Das
+    via cdouglas)
+
+    HADOOP-5396. Provide ability to refresh queue ACLs in the JobTracker
+    without having to restart the daemon.
+    (Sreekanth Ramakrishnan and Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-4490. Provide ability to run tasks as job owners.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5697. Change org.apache.hadoop.examples.Grep to use new 
+    mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5625. Add operation duration to clienttrace. (Lei Xu via cdouglas)
+
+    HADOOP-5705. Improve TotalOrderPartitioner efficiency by updating the trie
+    construction. (Dick King via cdouglas)
+
+    HADOOP-5589. Eliminate source limit of 64 for map-side joins imposed by
+    TupleWritable encoding. (Jingkei Ly via cdouglas)
+
+    HADOOP-5734. Correct block placement policy description in HDFS
+    Design document. (Konstantin Boudnik via shv)
+
+    HADOOP-5657. Validate data in TestReduceFetch to improve merge test
+    coverage. (cdouglas)
+
+    HADOOP-5613. Change S3Exception to checked exception.
+    (Andrew Hitchcock via tomwhite)
+
+    HADOOP-5717. Create public enum class for the Framework counters in 
+    org.apache.hadoop.mapreduce. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5217. Split AllTestDriver for core, hdfs and mapred. (sharad)
+
+    HADOOP-5364. Add certificate expiration warning to HsftpFileSystem and HDFS
+    proxy. (Zhiyong Zhang via cdouglas)
+
+    HADOOP-5733. Add map/reduce slot capacity and blacklisted capacity to
+    JobTracker metrics. (Sreekanth Ramakrishnan via cdouglas)
+
+    HADOOP-5596. Add EnumSetWritable. (He Yongqiang via szetszwo)
+
+    HADOOP-5727. Simplify hashcode for ID types. (Shevek via cdouglas)
+
+    HADOOP-5500. In DBOutputFormat, where field names are absent permit the
+    number of fields to be sufficient to construct the select query. (Enis
+    Soztutar via cdouglas)
+
+    HADOOP-5081. Split TestCLI into HDFS, Mapred and Core tests. (sharad)
+
+    HADOOP-5015. Separate block management code from FSNamesystem.  (Suresh
+    Srinivas via szetszwo)
+
+    HADOOP-5080. Add new test cases to TestMRCLI and TestHDFSCLI
+    (V.Karthikeyan via nigel)
+
+    HADOOP-5135. Splits the tests into different directories based on the 
+    package. Four new test targets have been defined - run-test-core, 
+    run-test-mapred, run-test-hdfs and run-test-hdfs-with-mr.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-5771. Implements unit tests for LinuxTaskController.
+    (Sreekanth Ramakrishnan and Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-5419. Provide a facility to query the Queue ACLs for the
+    current user.
+    (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-5780. Improve per block message prited by "-metaSave" in HDFS.
+    (Raghu Angadi)
+
+    HADOOP-5823. Added a new class DeprecatedUTF8 to help with removing
+    UTF8 related javac warnings. These warnings are removed in 
+    FSEditLog.java as a use case. (Raghu Angadi)
+
+    HADOOP-5824. Deprecate DataTransferProtocol.OP_READ_METADATA and remove
+    the corresponding unused codes.  (Kan Zhang via szetszwo)
+
+    HADOOP-5721. Factor out EditLogFileInputStream and EditLogFileOutputStream
+    into independent classes. (Luca Telloli & Flavio Junqueira via shv)
+
+    HADOOP-5838. Fix a few javac warnings in HDFS. (Raghu Angadi)
+
+    HADOOP-5854. Fix a few "Inconsistent Synchronization" warnings in HDFS.
+    (Raghu Angadi)
+
+    HADOOP-5369. Small tweaks to reduce MapFile index size. (Ben Maurer 
+    via sharad)
+
+    HADOOP-5858. Eliminate UTF8 and fix warnings in test/hdfs-with-mr package.
+    (shv)
+
+    HADOOP-5866. Move DeprecatedUTF8 from o.a.h.io to o.a.h.hdfs since it may
+    not be used outside hdfs. (Raghu Angadi)
+
+    HADOOP-5857. Move normal java methods from hdfs .jsp files to .java files.
+    (szetszwo)
+
+    HADOOP-5873. Remove deprecated methods randomDataNode() and
+    getDatanodeByIndex(..) in FSNamesystem.  (szetszwo)
+
+    HADOOP-5572. Improves the progress reporting for the sort phase for both
+    maps and reduces. (Ravi Gummadi via ddas)
+
+    HADOOP-5839. Fix EC2 scripts to allow remote job submission.
+    (Joydeep Sen Sarma via tomwhite)
+
+    HADOOP-5877. Fix javac warnings in TestHDFSServerPorts, TestCheckpoint, 
+    TestNameEditsConfig, TestStartup and TestStorageRestore.
+    (Jakob Homan via shv)
+
+    HADOOP-5438. Provide a single FileSystem method to create or 
+    open-for-append to a file.  (He Yongqiang via dhruba)
+
+    HADOOP-5472. Change DistCp to support globbing of input paths.  (Dhruba
+    Borthakur and Rodrigo Schmidt via szetszwo)
+
+    HADOOP-5175. Don't unpack libjars on classpath. (Todd Lipcon via tomwhite)
+
+    HADOOP-5620. Add an option to DistCp for preserving modification and access
+    times.  (Rodrigo Schmidt via szetszwo)
+
+    HADOOP-5664. Change map serialization so a lock is obtained only where
+    contention is possible, rather than for each write. (cdouglas)
+
+    HADOOP-5896. Remove the dependency of GenericOptionsParser on 
+    Option.withArgPattern. (Giridharan Kesavan and Sharad Agarwal via 
+    sharad)
+
+    HADOOP-5784. Makes the number of heartbeats that should arrive a second
+    at the JobTracker configurable. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5955. Changes TestFileOuputFormat so that is uses LOCAL_MR
+    instead of CLUSTER_MR. (Jothi Padmanabhan via das)
+
+    HADOOP-5948. Changes TestJavaSerialization to use LocalJobRunner 
+    instead of MiniMR/DFS cluster. (Jothi Padmanabhan via das)
+
+    HADOOP-2838. Add mapred.child.env to pass environment variables to 
+    tasktracker's child processes. (Amar Kamat via sharad)
+
+    HADOOP-5961. DataNode process understand generic hadoop command line
+    options (like -Ddfs.property=value). (Raghu Angadi)
+
+    HADOOP-5938. Change org.apache.hadoop.mapred.jobcontrol to use new
+    api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-2141. Improves the speculative execution heuristic. The heuristic
+    is currently based on the progress-rates of tasks and the expected time
+    to complete. Also, statistics about trackers are collected, and speculative
+    tasks are not given to the ones deduced to be slow. 
+    (Andy Konwinski and ddas)
+
+    HADOOP-5952. Change "-1 tests included" wording in test-patch.sh.
+    (Gary Murry via szetszwo)
+
+    HADOOP-6106. Provides an option in ShellCommandExecutor to timeout 
+    commands that do not complete within a certain amount of time.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5925. EC2 scripts should exit on error. (tomwhite)
+
+    HADOOP-6109. Change Text to grow its internal buffer exponentially, rather
+    than the max of the current length and the proposed length to improve
+    performance reading large values. (thushara wijeratna via cdouglas)
+
+    HADOOP-2366. Support trimmed strings in Configuration.  (Michele Catasta
+    via szetszwo)
+
+    HADOOP-6099. The RPC module can be configured to not send period pings.
+    The default behaviour of sending periodic pings remain unchanged. (dhruba)
+
+    HADOOP-6142. Update documentation and use of harchives for relative paths
+    added in MAPREDUCE-739. (Mahadev Konar via cdouglas)
+
+    HADOOP-6148. Implement a fast, pure Java CRC32 calculator which outperforms
+    java.util.zip.CRC32.  (Todd Lipcon and Scott Carey via szetszwo)
+
+    HADOOP-6146. Upgrade to JetS3t version 0.7.1. (tomwhite)
+
+    HADOOP-6161. Add get/setEnum methods to Configuration. (cdouglas)
+
+    HADOOP-6160. Fix releaseaudit target to run on specific directories.
+    (gkesavan)
+    
+    HADOOP-6169. Removing deprecated method calls in TFile. (hong tang via 
+    mahadev)
+
+    HADOOP-6176. Add a couple package private methods to AccessTokenHandler
+    for testing.  (Kan Zhang via szetszwo)
+
+    HADOOP-6182. Fix ReleaseAudit warnings (Giridharan Kesavan and Lee Tucker
+    via gkesavan)
+
+    HADOOP-6173. Change src/native/packageNativeHadoop.sh to package all
+    native library files.  (Hong Tang via szetszwo)
+
+    HADOOP-6184. Provide an API to dump Configuration in a JSON format.
+    (V.V.Chaitanya Krishna via yhemanth)
+
+    HADOOP-6224. Add a method to WritableUtils performing a bounded read of an
+    encoded String. (Jothi Padmanabhan via cdouglas)
+
+    HADOOP-6133. Add a caching layer to Configuration::getClassByName to
+    alleviate a performance regression introduced in a compatibility layer.
+    (Todd Lipcon via cdouglas)
+
+    HADOOP-6252. Provide a method to determine if a deprecated key is set in
+    config file. (Jakob Homan via suresh)
+
+    HADOOP-5879. Read compression level and strategy from Configuration for
+    gzip compression. (He Yongqiang via cdouglas)
+
+    HADOOP-6216. Support comments in host files.  (Ravi Phulari and Dmytro
+    Molkov via szetszwo)
+
+    HADOOP-6217. Update documentation for project split. (Corinne Chandel via 
+    omalley)
+
+    HADOOP-6268. Add ivy jar to .gitignore. (Todd Lipcon via cdouglas)
+
+    HADOOP-6270. Support deleteOnExit in FileContext.  (Suresh Srinivas via
+    szetszwo)
+
+    HADOOP-6233. Rename configuration keys towards API standardization and
+    backward compatibility. (Jithendra Pandey via suresh)
+
+    HADOOP-6260. Add additional unit tests for FileContext util methods.
+    (Gary Murry via suresh).
+
+    HADOOP-6309. Change build.xml to run tests with java asserts.  (Eli
+    Collins via szetszwo)
+
+    HADOOP-6326. Hundson runs should check for AspectJ warnings and report
+    failure if any is present (cos)
+
+    HADOOP-6329. Add build-fi directory to the ignore lists.  (szetszwo)
+
+    HADOOP-5107. Use Maven ant tasks to publish the subproject jars.
+    (Giridharan Kesavan via omalley)
+
+    HADOOP-6343. Log unexpected throwable object caught in RPC.  (Jitendra Nath
+    Pandey via szetszwo)
+
+    HADOOP-6367. Removes Access Token implementation from common.
+    (Kan Zhang via ddas)
+
+    HADOOP-6395. Upgrade some libraries to be consistent across common, hdfs,
+    and mapreduce. (omalley)
+
+    HADOOP-6398. Build is broken after HADOOP-6395 patch has been applied (cos)
+
+    HADOOP-6413. Move TestReflectionUtils to Common. (Todd Lipcon via tomwhite)
+
+    HADOOP-6283. Improve the exception messages thrown by
+    FileUtil$HardLink.getLinkCount(..).  (szetszwo)
+
+    HADOOP-6279. Add Runtime::maxMemory to JVM metrics. (Todd Lipcon via
+    cdouglas)
+
+    HADOOP-6305. Unify build property names to facilitate cross-projects
+    modifications (cos)
+
+    HADOOP-6312. Remove unnecessary debug logging in Configuration constructor.
+    (Aaron Kimball via cdouglas)
+
+    HADOOP-6366. Reduce ivy console output to ovservable level (cos)
+
+    HADOOP-6400. Log errors getting Unix UGI. (Todd Lipcon via tomwhite)
+
+    HADOOP-6346. Add support for specifying unpack pattern regex to
+    RunJar.unJar. (Todd Lipcon via tomwhite)
+
+    HADOOP-6422. Make RPC backend plugable, protocol-by-protocol, to
+    ease evolution towards Avro.  (cutting)
+
+    HADOOP-5958. Use JDK 1.6 File APIs in DF.java wherever possible.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-6222. Core doesn't have TestCommonCLI facility. (cos)
+
+    HADOOP-6394. Add a helper class to simplify FileContext related tests and
+    improve code reusability. (Jitendra Nath Pandey via suresh)
+
+    HADOOP-4656. Add a user to groups mapping service. (boryas, acmurthy)
+
+    HADOOP-6435. Make RPC.waitForProxy with timeout public. (Steve Loughran
+    via tomwhite)
+  
+    HADOOP-6472. add tokenCache option to GenericOptionsParser for passing
+     file with secret keys to a map reduce job. (boryas)
+
+    HADOOP-3205. Read multiple chunks directly from FSInputChecker subclass
+    into user buffers. (Todd Lipcon via tomwhite)
+
+    HADOOP-6479. TestUTF8 assertions could fail with better text.
+    (Steve Loughran via tomwhite)
+
+    HADOOP-6155. Deprecate RecordIO anticipating Avro. (Tom White via cdouglas)
+
+    HADOOP-6492. Make some Avro serialization APIs public.
+    (Aaron Kimball via cutting)
+
+    HADOOP-6497. Add an adapter for Avro's SeekableInput interface, so
+    that Avro can read FileSystem data.
+    (Aaron Kimball via cutting)
+
+    HADOOP-6495.  Identifier should be serialized after the password is
+     created In Token constructor (jnp via boryas)
+
+    HADOOP-6518. Makes the UGI honor the env var KRB5CCNAME. 
+    (Owen O'Malley via ddas)
+
+    HADOOP-6531. Enhance FileUtil with an API to delete all contents of a
+    directory. (Amareshwari Sriramadasu via yhemanth)
+
+    HADOOP-6547. Move DelegationToken into Common, so that it can be used by
+    MapReduce also. (devaraj via omalley)
+
+    HADOOP-6552. Puts renewTGT=true and useTicketCache=true for the keytab
+    kerberos options. (ddas)
+
+    HADOOP-6534. Trim whitespace from directory lists initializing
+    LocalDirAllocator. (Todd Lipcon via cdouglas)
+
+    HADOOP-6559. Makes the RPC client automatically re-login when the SASL 
+    connection setup fails. This is applicable only to keytab based logins.
+    (Devaraj Das)
+
+    HADOOP-6551. Delegation token renewing and cancelling should provide
+    meaningful exceptions when there are failures instead of returning 
+    false. (omalley)
+
+    HADOOP-6583. Captures authentication and authorization metrics. (ddas)
+
+    HADOOP-6543. Allows secure clients to talk to unsecure clusters. 
+    (Kan Zhang via ddas)
+
+    HADOOP-6579. Provide a mechanism for encoding/decoding Tokens from
+    a url-safe string and change the commons-code library to 1.4. (omalley)
+
+    HADOOP-6596. Add a version field to the AbstractDelegationTokenIdentifier's
+    serialized value. (omalley)
+
+    HADOOP-6573. Support for persistent delegation tokens.
+    (Jitendra Pandey via shv)
+
+    HADOOP-6594. Provide a fetchdt tool via bin/hdfs. (jhoman via acmurthy) 
+
+    HADOOP-6589. Provide better error messages when RPC authentication fails.
+    (Kan Zhang via omalley)
+
+    HADOOP-6599  Split existing RpcMetrics into RpcMetrics & RpcDetailedMetrics.
+    (Suresh Srinivas via Sanjay Radia)
+
+    HADOOP-6537 Declare more detailed exceptions in FileContext and 
+    AbstractFileSystem (Suresh Srinivas via Sanjay Radia)
+
+    HADOOP-6486. fix common classes to work with Avro 1.3 reflection.
+    (cutting via tomwhite)
+
+    HADOOP-6591. HarFileSystem can handle paths with the whitespace characters.
+    (Rodrigo Schmidt via dhruba)
+
+    HADOOP-6407. Have a way to automatically update Eclipse .classpath file
+    when new libs are added to the classpath through Ivy. (tomwhite)
+
+    HADOOP-3659. Patch to allow hadoop native to compile on Mac OS X.
+    (Colin Evans and Allen Wittenauer via tomwhite)
+
+    HADOOP-6471. StringBuffer -> StringBuilder - conversion of references
+    as necessary. (Kay Kay via tomwhite)
+
+    HADOOP-6646. Move HarfileSystem out of Hadoop Common. (mahadev)
+
+    HADOOP-6566. Add methods supporting, enforcing narrower permissions on
+    local daemon directories. (Arun Murthy and Luke Lu via cdouglas)
+
+    HADOOP-6705. Fix to work with 1.5 version of jiracli
+    (Giridharan Kesavan)
+
+    HADOOP-6658. Exclude Private elements from generated Javadoc. (tomwhite)
+
+    HADOOP-6635. Install/deploy source jars to Maven repo. 
+    (Patrick Angeles via jghoman)
+
+    HADOOP-6717. Log levels in o.a.h.security.Groups too high 
+    (Todd Lipcon via jghoman)
+
+    HADOOP-6667. RPC.waitForProxy should retry through NoRouteToHostException.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-6677. InterfaceAudience.LimitedPrivate should take a string not an
+    enum. (tomwhite)
+
+    HADOOP-678. Remove FileContext#isFile, isDirectory, and exists.
+    (Eli Collins via hairong)
+
+    HADOOP-6515. Make maximum number of http threads configurable.
+    (Scott Chen via zshao)
+
+    HADOOP-6563. Add more symlink tests to cover intermediate symlinks
+    in paths. (Eli Collins via suresh)
+
+    HADOOP-6585.  Add FileStatus#isDirectory and isFile.  (Eli Collins via
+    tomwhite)
+
+    HADOOP-6738.  Move cluster_setup.xml from MapReduce to Common.
+    (Tom White via tomwhite)
+
+    HADOOP-6794. Move configuration and script files post split. (tomwhite)
+
+    HADOOP-6403.  Deprecate EC2 bash scripts.  (tomwhite)
+
+    HADOOP-6769. Add an API in FileSystem to get FileSystem instances based 
+    on users(ddas via boryas)
+
+    HADOOP-6813. Add a new newInstance method in FileSystem that takes 
+    a "user" as argument (ddas via boryas)
+
+    HADOOP-6668.  Apply audience and stability annotations to classes in
+    common.  (tomwhite)
+
+    HADOOP-6821.  Document changes to memory monitoring.  (Hemanth Yamijala
+    via tomwhite)
+
+  OPTIMIZATIONS
+
+    HADOOP-5595. NameNode does not need to run a replicator to choose a
+    random DataNode. (hairong)
+
+    HADOOP-5603. Improve NameNode's block placement performance. (hairong)
+
+    HADOOP-5638. More improvement on block placement performance. (hairong)
+
+    HADOOP-6180. NameNode slowed down when many files with same filename
+    were moved to Trash. (Boris Shkolnik via hairong)
+
+    HADOOP-6166. Further improve the performance of the pure-Java CRC32
+    implementation. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-6271. Add recursive and non recursive create and mkdir to 
+    FileContext. (Sanjay Radia via suresh)
+
+    HADOOP-6261. Add URI based tests for FileContext. 
+    (Ravi Pulari via suresh).
+
+    HADOOP-6307. Add a new SequenceFile.Reader constructor in order to support
+    reading on un-closed file.  (szetszwo)
+
+    HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..).
+    (mahadev via szetszwo)
+
+    HADOOP-6569. FsShell#cat should avoid calling unecessary getFileStatus
+    before opening a file to read. (hairong)
+
+    HADOOP-6689. Add directory renaming test to existing FileContext tests.
+    (Eli Collins via suresh)
+
+    HADOOP-6713. The RPC server Listener thread is a scalability bottleneck.
+    (Dmytro Molkov via hairong)
+
+  BUG FIXES
+
+    HADOOP-6748. Removes hadoop.cluster.administrators, cluster administrators
+    acl is passed as parameter in constructor. (amareshwari) 
+
+    HADOOP-6828. Herrior uses old way of accessing logs directories (Sreekanth
+    Ramakrishnan via cos)
+
+    HADOOP-6788. [Herriot] Exception exclusion functionality is not working
+    correctly. (Vinay Thota via cos)
+
+    HADOOP-6773. Ivy folder contains redundant files (cos)
+
+    HADOOP-5379. CBZip2InputStream to throw IOException on data crc error.
+    (Rodrigo Schmidt via zshao)
+
+    HADOOP-5326. Fixes CBZip2OutputStream data corruption problem.
+    (Rodrigo Schmidt via zshao)
+
+    HADOOP-4963. Fixes a logging to do with getting the location of
+    map output file. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2337. Trash should close FileSystem on exit and should not start 
+    emtying thread if disabled. (shv)
+
+    HADOOP-5072. Fix failure in TestCodec because testSequenceFileGzipCodec 
+    won't pass without native gzip codec. (Zheng Shao via dhruba)
+
+    HADOOP-5050. TestDFSShell.testFilePermissions should not assume umask
+    setting.  (Jakob Homan via szetszwo)
+
+    HADOOP-4975. Set classloader for nested mapred.join configs. (Jingkei Ly
+    via cdouglas)
+
+    HADOOP-5078. Remove invalid AMI kernel in EC2 scripts. (tomwhite)
+
+    HADOOP-5045. FileSystem.isDirectory() should not be deprecated.  (Suresh
+    Srinivas via szetszwo)
+
+    HADOOP-4960. Use datasource time, rather than system time, during metrics
+    demux. (Eric Yang via cdouglas)
+
+    HADOOP-5032. Export conf dir set in config script. (Eric Yang via cdouglas)
+
+    HADOOP-5176. Fix a typo in TestDFSIO.  (Ravi Phulari via szetszwo)
+
+    HADOOP-4859. Distinguish daily rolling output dir by adding a timestamp.
+    (Jerome Boulon via cdouglas)
+
+    HADOOP-4959. Correct system metric collection from top on Redhat 5.1. (Eric
+    Yang via cdouglas)
+
+    HADOOP-5039. Fix log rolling regex to process only the relevant
+    subdirectories. (Jerome Boulon via cdouglas)
+
+    HADOOP-5095. Update Chukwa watchdog to accept config parameter. (Jerome
+    Boulon via cdouglas)
+
+    HADOOP-5147. Correct reference to agent list in Chukwa bin scripts. (Ari
+    Rabkin via cdouglas)
+
+    HADOOP-5148. Fix logic disabling watchdog timer in Chukwa daemon scripts.
+    (Ari Rabkin via cdouglas)
+
+    HADOOP-5100. Append, rather than truncate, when creating log4j metrics in
+    Chukwa. (Jerome Boulon via cdouglas)
+
+    HADOOP-5204. Fix broken trunk compilation on Hudson by letting 
+    task-controller be an independent target in build.xml.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5212. Fix the path translation problem introduced by HADOOP-4868 
+    running on cygwin. (Sharad Agarwal via omalley)
+
+    HADOOP-5226. Add license headers to html and jsp files.  (szetszwo)
+
+    HADOOP-5172. Disable misbehaving Chukwa unit test until it can be fixed.
+    (Jerome Boulon via nigel)
+
+    HADOOP-4933. Fixes a ConcurrentModificationException problem that shows up
+    when the history viewer is accessed concurrently. 
+    (Amar Kamat via ddas)
+
+    HADOOP-5253. Remove duplicate call to cn-docs target. 
+    (Giri Kesavan via nigel)
+
+    HADOOP-5251. Fix classpath for contrib unit tests to include clover jar.
+    (nigel)
+
+    HADOOP-5206. Synchronize "unprotected*" methods of FSDirectory on the root.
+    (Jakob Homan via shv)
+
+    HADOOP-5292. Fix NPE in KFS::getBlockLocations. (Sriram Rao via lohit)
+
+    HADOOP-5219. Adds a new property io.seqfile.local.dir for use by
+    SequenceFile, which earlier used mapred.local.dir. (Sharad Agarwal
+    via ddas)
+
+    HADOOP-5300. Fix ant javadoc-dev target and the typo in the class name
+    NameNodeActivtyMBean.  (szetszwo)
+
+    HADOOP-5218.  libhdfs unit test failed because it was unable to 
+    start namenode/datanode. Fixed. (dhruba)
+
+    HADOOP-5273. Add license header to TestJobInProgress.java.  (Jakob Homan
+    via szetszwo)
+    
+    HADOOP-5229. Remove duplicate version variables in build files
+    (Stefan Groschupf via johan)
+
+    HADOOP-5383. Avoid building an unused string in NameNode's 
+    verifyReplication(). (Raghu Angadi)
+
+    HADOOP-5347. Create a job output directory for the bbp examples. (szetszwo)
+
+    HADOOP-5341. Make hadoop-daemon scripts backwards compatible with the
+    changes in HADOOP-4868. (Sharad Agarwal via yhemanth)
+
+    HADOOP-5456. Fix javadoc links to ClientProtocol#restoreFailedStorage(..).
+    (Boris Shkolnik via szetszwo)
+
+    HADOOP-5458. Remove leftover Chukwa entries from build, etc. (cdouglas)
+
+    HADOOP-5386. Modify hdfsproxy unit test to start on a random port,
+    implement clover instrumentation. (Zhiyong Zhang via cdouglas)
+
+    HADOOP-5511. Add Apache License to EditLogBackupOutputStream. (shv)
+
+    HADOOP-5507. Fix JMXGet javadoc warnings.  (Boris Shkolnik via szetszwo)
+
+    HADOOP-5191. Accessing HDFS with any ip or hostname should work as long 
+    as it points to the interface NameNode is listening on. (Raghu Angadi)
+
+    HADOOP-5561. Add javadoc.maxmemory parameter to build, preventing OOM
+    exceptions from javadoc-dev. (Jakob Homan via cdouglas)
+
+    HADOOP-5149. Modify HistoryViewer to ignore unfamiliar files in the log
+    directory. (Hong Tang via cdouglas)
+
+    HADOOP-5477. Fix rare failure in TestCLI for hosts returning variations of
+    'localhost'. (Jakob Homan via cdouglas)
+
+    HADOOP-5194. Disables setsid for tasks run on cygwin. 
+    (Ravi Gummadi via ddas)
+
+    HADOOP-5322. Fix misleading/outdated comments in JobInProgress.
+    (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-5198. Fixes a problem to do with the task PID file being absent and 
+    the JvmManager trying to look for it. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5464. DFSClient did not treat write timeout of 0 properly.
+    (Raghu Angadi)
+
+    HADOOP-4045. Fix processing of IO errors in EditsLog.
+    (Boris Shkolnik via shv)
+
+    HADOOP-5462. Fixed a double free bug in the task-controller
+    executable. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5652. Fix a bug where in-memory segments are incorrectly retained in
+    memory. (cdouglas)
+
+    HADOOP-5533. Recovery duration shown on the jobtracker webpage is 
+    inaccurate. (Amar Kamat via sharad)
+
+    HADOOP-5647. Fix TestJobHistory to not depend on /tmp. (Ravi Gummadi 
+    via sharad)
+
+    HADOOP-5661. Fixes some findbugs warnings in o.a.h.mapred* packages and
+    supresses a bunch of them. (Jothi Padmanabhan via ddas)
+
+    HADOOP-5704. Fix compilation problems in TestFairScheduler and
+    TestCapacityScheduler.  (Chris Douglas via szetszwo)
+
+    HADOOP-5650. Fix safemode messages in the Namenode log.  (Suresh Srinivas
+    via szetszwo)
+
+    HADOOP-5488. Removes the pidfile management for the Task JVM from the
+    framework and instead passes the PID back and forth between the
+    TaskTracker and the Task processes. (Ravi Gummadi via ddas)
+
+    HADOOP-5658. Fix Eclipse templates. (Philip Zeyliger via shv)
+
+    HADOOP-5709. Remove redundant synchronization added in HADOOP-5661. (Jothi
+    Padmanabhan via cdouglas)
+
+    HADOOP-5715. Add conf/mapred-queue-acls.xml to the ignore lists.
+    (szetszwo)
+
+    HADOOP-5592. Fix typo in Streaming doc in reference to GzipCodec.
+    (Corinne Chandel via tomwhite)
+
+    HADOOP-5656. Counter for S3N Read Bytes does not work. (Ian Nowland
+    via tomwhite)
+
+    HADOOP-5406. Fix JNI binding for ZlibCompressor::setDictionary. (Lars
+    Francke via cdouglas)
+
+    HADOOP-3426. Fix/provide handling when DNS lookup fails on the loopback
+    address. Also cache the result of the lookup. (Steve Loughran via cdouglas)
+
+    HADOOP-5476. Close the underlying InputStream in SequenceFile::Reader when
+    the constructor throws an exception. (Michael Tamm via cdouglas)
+
+    HADOOP-5675. Do not launch a job if DistCp has no work to do. (Tsz Wo
+    (Nicholas), SZE via cdouglas)
+
+    HADOOP-5737. Fixes a problem in the way the JobTracker used to talk to
+    other daemons like the NameNode to get the job's files. Also adds APIs
+    in the JobTracker to get the FileSystem objects as per the JobTracker's
+    configuration. (Amar Kamat via ddas) 
+
+    HADOOP-5648. Not able to generate gridmix.jar on the already compiled 
+    version of hadoop. (gkesavan)	
+
+    HADOOP-5808. Fix import never used javac warnings in hdfs. (szetszwo)
+
+    HADOOP-5203. TT's version build is too restrictive. (Rick Cox via sharad)
+
+    HADOOP-5818. Revert the renaming from FSNamesystem.checkSuperuserPrivilege
+    to checkAccess by HADOOP-5643.  (Amar Kamat via szetszwo)
+
+    HADOOP-5820. Fix findbugs warnings for http related codes in hdfs.
+    (szetszwo)
+
+    HADOOP-5822. Fix javac warnings in several dfs tests related to unncessary
+    casts.  (Jakob Homan via szetszwo)
+
+    HADOOP-5842. Fix a few javac warnings under packages fs and util.
+    (Hairong Kuang via szetszwo)
+
+    HADOOP-5845. Build successful despite test failure on test-core target.
+    (sharad)
+
+    HADOOP-5314. Prevent unnecessary saving of the file system image during 
+    name-node startup. (Jakob Homan via shv)
+
+    HADOOP-5855. Fix javac warnings for DisallowedDatanodeException and
+    UnsupportedActionException.  (szetszwo)
+
+    HADOOP-5582. Fixes a problem in Hadoop Vaidya to do with reading
+    counters from job history files. (Suhas Gogate via ddas)
+
+    HADOOP-5829. Fix javac warnings found in ReplicationTargetChooser,
+    FSImage, Checkpointer, SecondaryNameNode and a few other hdfs classes.
+    (Suresh Srinivas via szetszwo)
+
+    HADOOP-5835. Fix findbugs warnings found in Block, DataNode, NameNode and
+    a few other hdfs classes.  (Suresh Srinivas via szetszwo)
+
+    HADOOP-5853. Undeprecate HttpServer.addInternalServlet method.  (Suresh
+    Srinivas via szetszwo)
+
+    HADOOP-5801. Fixes the problem: If the hosts file is changed across restart
+    then it should be refreshed upon recovery so that the excluded hosts are 
+    lost and the maps are re-executed. (Amar Kamat via ddas)
+
+    HADOOP-5841. Resolve findbugs warnings in DistributedFileSystem,
+    DatanodeInfo, BlocksMap, DataNodeDescriptor.  (Jakob Homan via szetszwo)
+
+    HADOOP-5878. Fix import and Serializable javac warnings found in hdfs jsp.
+    (szetszwo)
+
+    HADOOP-5782. Revert a few formatting changes introduced in HADOOP-5015.
+    (Suresh Srinivas via rangadi)
+
+    HADOOP-5687. NameNode throws NPE if fs.default.name is the default value.
+    (Philip Zeyliger via shv)
+
+    HADOOP-5867. Fix javac warnings found in NNBench and NNBenchWithoutMR.
+    (Konstantin Boudnik via szetszwo)
+    
+    HADOOP-5728. Fixed FSEditLog.printStatistics IndexOutOfBoundsException.
+    (Wang Xu via johan)
+
+    HADOOP-5847. Fixed failing Streaming unit tests (gkesavan) 
+
+    HADOOP-5252. Streaming overrides -inputformat option (Klaas Bosteels 
+    via sharad)
+
+    HADOOP-5710. Counter MAP_INPUT_BYTES missing from new mapreduce api. 
+    (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5809. Fix job submission, broken by errant directory creation.
+    (Sreekanth Ramakrishnan and Jothi Padmanabhan via cdouglas)
+
+    HADOOP-5635. Change distributed cache to work with other distributed file
+    systems. (Andrew Hitchcock via tomwhite)
+
+    HADOOP-5856. Fix "unsafe multithreaded use of DateFormat" findbugs warning
+    in DataBlockScanner.  (Kan Zhang via szetszwo)
+
+    HADOOP-4864. Fixes a problem to do with -libjars with multiple jars when
+    client and cluster reside on different OSs. (Amareshwari Sriramadasu via 
+    ddas)
+
+    HADOOP-5623. Fixes a problem to do with status messages getting overwritten
+    in streaming jobs. (Rick Cox and Jothi Padmanabhan via ddas)
+
+    HADOOP-5895. Fixes computation of count of merged bytes for logging.
+    (Ravi Gummadi via ddas)
+
+    HADOOP-5805. problem using top level s3 buckets as input/output 
+    directories. (Ian Nowland via tomwhite)
+   
+    HADOOP-5940. trunk eclipse-plugin build fails while trying to copy 
+    commons-cli jar from the lib dir (Giridharan Kesavan via gkesavan)
+
+    HADOOP-5864. Fix DMI and OBL findbugs in packages hdfs and metrics.
+    (hairong)
+
+    HADOOP-5935. Fix Hudson's release audit warnings link is broken. 
+    (Giridharan Kesavan via gkesavan)
+
+    HADOOP-5947. Delete empty TestCombineFileInputFormat.java
+
+    HADOOP-5899. Move a log message in FSEditLog to the right place for
+    avoiding unnecessary log.  (Suresh Srinivas via szetszwo)
+
+    HADOOP-5944. Add Apache license header to BlockManager.java.  (Suresh
+    Srinivas via szetszwo)
+
+    HADOOP-5891. SecondaryNamenode is able to converse with the NameNode 
+    even when the default value of dfs.http.address is not overridden.
+    (Todd Lipcon via dhruba)
+
+    HADOOP-5953. The isDirectory(..) and isFile(..) methods in KosmosFileSystem
+    should not be deprecated.  (szetszwo)
+
+    HADOOP-5954. Fix javac warnings in TestFileCreation, TestSmallBlock,
+    TestFileStatus, TestDFSShellGenericOptions, TestSeekBug and
+    TestDFSStartupVersions.  (szetszwo)
+
+    HADOOP-5956. Fix ivy dependency in hdfsproxy and capacity-scheduler.
+    (Giridharan Kesavan via szetszwo)
+
+    HADOOP-5836. Bug in S3N handling of directory markers using an object with
+    a trailing "/" causes jobs to fail. (Ian Nowland via tomwhite)
+
+    HADOOP-5861. s3n files are not getting split by default. (tomwhite)
+
+    HADOOP-5762. Fix a problem that DistCp does not copy empty directory.
+    (Rodrigo Schmidt via szetszwo)
+
+    HADOOP-5859. Fix "wait() or sleep() with locks held" findbugs warnings in
+    DFSClient.  (Kan Zhang via szetszwo)
+   
+    HADOOP-5457. Fix to continue to run builds even if contrib test fails
+    (Giridharan Kesavan via gkesavan)
+
+    HADOOP-5963. Remove an unnecessary exception catch in NNBench.  (Boris
+    Shkolnik via szetszwo)
+
+    HADOOP-5989. Fix streaming test failure.  (gkesavan)
+
+    HADOOP-5981. Fix a bug in HADOOP-2838 in parsing mapred.child.env.
+    (Amar Kamat via sharad)
+
+    HADOOP-5420. Fix LinuxTaskController to kill tasks using the process
+    groups they are launched with.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-6031. Remove @author tags from Java source files.  (Ravi Phulari
+    via szetszwo)
+
+    HADOOP-5980. Fix LinuxTaskController so tasks get passed 
+    LD_LIBRARY_PATH and other environment variables.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4041. IsolationRunner does not work as documented.
+    (Philip Zeyliger via tomwhite)
+
+    HADOOP-6004. Fixes BlockLocation deserialization.  (Jakob Homan via
+    szetszwo)
+
+    HADOOP-6079. Serialize proxySource as DatanodeInfo in DataTransferProtocol.
+    (szetszwo)
+
+    HADOOP-6096. Fix Eclipse project and classpath files following project
+    split. (tomwhite)
+
+    HADOOP-6122. The great than operator in test-patch.sh should be "-gt" but
+    not ">".  (szetszwo)
+
+    HADOOP-6114. Fix javadoc documentation for FileStatus.getLen.
+    (Dmitry Rzhevskiy via dhruba)
+
+    HADOOP-6131. A sysproperty should not be set unless the property 
+    is set on the ant command line in build.xml (hong tang via mahadev)
+
+    HADOOP-6137. Fix project specific test-patch requirements
+    (Giridharan Kesavan)
+
+    HADOOP-6138. Eliminate the deprecated warnings introduced by H-5438.
+    (He Yongqiang via szetszwo)
+
+    HADOOP-6132. RPC client create an extra connection because of incorrect
+    key for connection cache. (Kan Zhang via rangadi)
+
+    HADOOP-6123. Add missing classpaths in hadoop-config.sh.  (Sharad Agarwal
+    via szetszwo)
+
+    HADOOP-6172. Fix jar file names in hadoop-config.sh and include 
+    ${build.src} as a part of the source list in build.xml.  (Hong Tang via 
+    szetszwo)
+
+    HADOOP-6124. Fix javac warning detection in test-patch.sh.  (Giridharan
+    Kesavan via szetszwo)
+
+    HADOOP-6177. FSInputChecker.getPos() would return position greater 
+    than the file size. (Hong Tang via hairong)
+
+    HADOOP-6188. TestTrash uses java.io.File api but not hadoop FileSystem api.
+    (Boris Shkolnik via szetszwo)
+
+    HADOOP-6192. Fix Shell.getUlimitMemoryCommand to not rely on Map-Reduce
+    specific configs. (acmurthy) 
+
+    HADOOP-6103. Clones the classloader as part of Configuration clone.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-6152. Fix classpath variables in bin/hadoop-config.sh and some
+    other scripts.  (Aaron Kimball via szetszwo)
+
+    HADOOP-6215. fix GenericOptionParser to deal with -D with '=' in the 
+    value. (Amar Kamat via sharad)
+
+    HADOOP-6227. Fix Configuration to allow final parameters to be set to null
+    and prevent them from being overridden.
+    (Amareshwari Sriramadasu via yhemanth)
+
+    HADOOP-6199. Move io.map.skip.index property to core-default from mapred.
+    (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-6229. Attempt to make a directory under an existing file on
+    LocalFileSystem should throw an Exception. (Boris Shkolnik via tomwhite)
+
+    HADOOP-6243. Fix a NullPointerException in processing deprecated keys.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-6009. S3N listStatus incorrectly returns null instead of empty
+    array when called on empty root. (Ian Nowland via tomwhite)
+
+    HADOOP-6181.  Fix .eclipse.templates/.classpath for avro and jets3t jar
+    files.  (Carlos Valiente via szetszwo)
+
+    HADOOP-6196. Fix a bug in SequenceFile.Reader where syncing within the
+    header would cause the reader to read the sync marker as a record. (Jay
+    Booth via cdouglas)
+
+    HADOOP-6250. Modify test-patch to delete copied XML files before running
+    patch build. (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-6257. Two TestFileSystem classes are confusing
+    hadoop-hdfs-hdfwithmr. (Philip Zeyliger via tomwhite)
+
+    HADOOP-6151. Added a input filter to all of the http servlets that quotes
+    html characters in the parameters, to prevent cross site scripting 
+    attacks. (omalley)
+
+    HADOOP-6274. Fix TestLocalFSFileContextMainOperations test failure.
+    (Gary Murry via suresh).
+
+    HADOOP-6281. Avoid null pointer exceptions when the jsps don't have 
+    paramaters (omalley)
+
+    HADOOP-6285. Fix the result type of the getParameterMap method in the
+    HttpServer.QuotingInputFilter. (omalley)
+
+    HADOOP-6286. Fix bugs in related to URI handling in glob methods in 
+    FileContext. (Boris Shkolnik via suresh)
+
+    HADOOP-6292. Update native libraries guide. (Corinne Chandel via cdouglas)
+
+    HADOOP-6327. FileContext tests should not use /tmp and should clean up
+    files.  (Sanjay Radia via szetszwo)
+
+    HADOOP-6318. Upgrade to Avro 1.2.0.  (cutting)
+
+    HADOOP-6334.  Fix GenericOptionsParser to understand URI for -files,
+    -libjars and -archives options and fix Path to support URI with fragment.
+    (Amareshwari Sriramadasu via szetszwo)
+
+    HADOOP-6344. Fix rm and rmr immediately delete files rather than sending 
+    to trash, if a user is over-quota. (Jakob Homan via suresh)
+
+    HADOOP-6347. run-test-core-fault-inject runs a test case twice if
+    -Dtestcase is set (cos)
+
+    HADOOP-6375. Sync documentation for FsShell du with its implementation.
+    (Todd Lipcon via cdouglas)
+
+    HADOOP-6441. Protect web ui from cross site scripting attacks (XSS) on
+    the host http header and using encoded utf-7. (omalley)
+
+    HADOOP-6451. Fix build to run contrib unit tests. (Tom White via cdouglas)
+
+    HADOOP-6374. JUnit tests should never depend on anything in conf.
+    (Anatoli Fomenko via cos)
+
+    HADOOP-6290. Prevent duplicate slf4j-simple jar via Avro's classpath.
+    (Owen O'Malley via cdouglas)
+
+    HADOOP-6293. Fix FsShell -text to work on filesystems other than the
+    default. (cdouglas)
+
+    HADOOP-6341. Fix test-patch.sh for checkTests function. (gkesavan)
+
+    HADOOP-6314. Fix "fs -help" for the "-count" commond.  (Ravi Phulari via
+    szetszwo)
+
+    HADOOP-6405. Update Eclipse configuration to match changes to Ivy
+    configuration (Edwin Chan via cos)
+
+    HADOOP-6411. Remove deprecated file src/test/hadoop-site.xml. (cos)
+
+    HADOOP-6386. NameNode's HttpServer can't instantiate InetSocketAddress:
+    IllegalArgumentException is thrown (cos)
+
+    HADOOP-6254. Slow reads cause s3n to fail with SocketTimeoutException.
+    (Andrew Hitchcock via tomwhite)
+
+    HADOOP-6428. HttpServer sleeps with negative values. (cos)
+
+    HADOOP-6414. Add command line help for -expunge command.
+    (Ravi Phulari via tomwhite)
+
+    HADOOP-6391. Classpath should not be part of command line arguments.
+    (Cristian Ivascu via tomwhite)
+
+    HADOOP-6462. Target "compile" does not exist in contrib/cloud. (tomwhite)
+
+    HADOOP-6402. testConf.xsl is not well-formed XML. (Steve Loughran
+    via tomwhite)
+
+    HADOOP-6489. Fix 3 findbugs warnings. (Erik Steffl via suresh)
+
+    HADOOP-6517. Fix UserGroupInformation so that tokens are saved/retrieved
+    to/from the embedded Subject (Owen O'Malley & Kan Zhang via ddas)
+
+    HADOOP-6538. Sets hadoop.security.authentication to simple by default.
+    (ddas)
+
+    HADOOP-6540. Contrib unit tests have invalid XML for core-site, etc.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-6521. User specified umask using deprecated dfs.umask must override
+    server configured using new dfs.umaskmode for backward compatibility.
+    (suresh)
+    
+    HADOOP-6522. Fix decoding of codepoint zero in UTF8. (cutting)
+
+    HADOOP-6505. Use tr rather than sed to effect literal substitution in the
+    build script. (Allen Wittenauer via cdouglas)
+
+    HADOOP-6548. Replace mortbay imports with commons logging. (cdouglas)
+
+    HADOOP-6560. Handle invalid har:// uri in HarFileSystem.  (szetszwo)
+
+    HADOOP-6549. TestDoAsEffectiveUser should use ip address of the host
+     for superuser ip check(jnp via boryas)
+
+    HADOOP-6570. RPC#stopProxy throws NPE if getProxyEngine(proxy) returns
+    null. (hairong)
+
+    HADOOP-6558. Return null in HarFileSystem.getFileChecksum(..) since no
+    checksum algorithm is implemented.  (szetszwo)
+
+    HADOOP-6572. Makes sure that SASL encryption and push to responder
+    queue for the RPC response happens atomically. (Kan Zhang via ddas)
+
+    HADOOP-6545. Changes the Key for the FileSystem cache to be UGI (ddas)
+
+    HADOOP-6609. Fixed deadlock in RPC by replacing shared static 
+    DataOutputBuffer in the UTF8 class with a thread local variable. (omalley)
+
+    HADOOP-6504. Invalid example in the documentation of
+    org.apache.hadoop.util.Tool. (Benoit Sigoure via tomwhite)
+
+    HADOOP-6546. BloomMapFile can return false negatives. (Clark Jefcoat
+    via tomwhite)
+
+    HADOOP-6593. TextRecordInputStream doesn't close SequenceFile.Reader.
+    (Chase Bradford via tomwhite)
+
+    HADOOP-6175. Incorrect version compilation with es_ES.ISO8859-15 locale
+    on Solaris 10. (Urko Benito via tomwhite)
+
+    HADOOP-6645.  Bugs on listStatus for HarFileSystem (rodrigo via mahadev)
+
+    HADOOP-6645. Re: Bugs on listStatus for HarFileSystem (rodrigo via
+    mahadev)
+
+    HADOOP-6654. Fix code example in WritableComparable javadoc.  (Tom White
+    via szetszwo)
+
+    HADOOP-6640. FileSystem.get() does RPC retries within a static
+    synchronized block. (hairong)
+
+    HADOOP-6691. TestFileSystemCaching sometimes hangs. (hairong)
+
+    HADOOP-6507. Hadoop Common Docs - delete 3 doc files that do not belong
+    under Common. (Corinne Chandel via tomwhite)
+
+    HADOOP-6439. Fixes handling of deprecated keys to follow order in which
+    keys are defined. (V.V.Chaitanya Krishna via yhemanth)
+
+    HADOOP-6690. FilterFileSystem correctly handles setTimes call.
+    (Rodrigo Schmidt via dhruba)
+
+    HADOOP-6703. Prevent renaming a file, directory or symbolic link to
+    itself. (Eli Collins via suresh)
+
+    HADOOP-6710. Symbolic umask for file creation is not conformant with posix.
+    (suresh)
+    
+    HADOOP-6719. Insert all missing methods in FilterFs.
+    (Rodrigo Schmidt via dhruba)
+
+    HADOOP-6724. IPC doesn't properly handle IOEs thrown by socket factory.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-6722. NetUtils.connect should check that it hasn't connected a socket
+    to itself. (Todd Lipcon via tomwhite)
+
+    HADOOP-6634. Fix AccessControlList to use short names to verify access 
+    control. (Vinod Kumar Vavilapalli via sharad)
+
+    HADOOP-6709. Re-instate deprecated FileSystem methods that were removed
+    after 0.20. (tomwhite)
+ 
+    HADOOP-6630. hadoop-config.sh fails to get executed if hadoop wrapper
+    scripts are in path. (Allen Wittenauer via tomwhite)
+
+    HADOOP-6742. Add methods HADOOP-6709 from to TestFilterFileSystem.
+    (Eli Collins via tomwhite)
+
+    HADOOP-6727. Remove UnresolvedLinkException from public FileContext APIs.
+    (Eli Collins via tomwhite)
+
+    HADOOP-6631. Fix FileUtil.fullyDelete() to continue deleting other files 
+    despite failure at any level. (Contributed by Ravi Gummadi and 
+    Vinod Kumar Vavilapalli)
+
+    HADOOP-6723.  Unchecked exceptions thrown in IPC Connection should not
+    orphan clients.  (Todd Lipcon via tomwhite)
+
+    HADOOP-6404. Rename the generated artifacts to common instead of core.
+    (tomwhite)
+
+    HADOOP-6461.  Webapps aren't located correctly post-split.
+    (Todd Lipcon and Steve Loughran via tomwhite)
+
+    HADOOP-6826.  Revert FileSystem create method that takes CreateFlags.
+    (tomwhite)
+
+    HADOOP-6800.  Harmonize JAR library versions.  (tomwhite)
+
+    HADOOP-6847. Problem staging 0.21.0 artifacts to Apache Nexus Maven
+    Repository (Giridharan Kesavan via cos)
+
+    HADOOP-6819. [Herriot] Shell command for getting the new exceptions in
+    the logs returning exitcode 1 after executing successfully. (Vinay Thota
+    via cos)
+
+    HADOOP-6839. [Herriot] Implement a functionality for getting the user list
+    for creating proxy users. (Vinay Thota via cos)
+
+    HADOOP-6836. [Herriot]: Generic method for adding/modifying the attributes
+    for new configuration. (Vinay Thota via cos)
+
+    HADOOP-6860. 'compile-fault-inject' should never be called directly.
+    (Konstantin Boudnik)
+
+    HADOOP-6790. Instrumented (Herriot) build uses too wide mask to include
+    aspect files. (Konstantin Boudnik)
+
+    HADOOP-6875. [Herriot] Cleanup of temp. configurations is needed upon
+    restart of a cluster (Vinay Thota via cos)
+
+Release 0.20.3 - Unreleased
+
+  NEW FEATURES
+
+    HADOOP-6637. Benchmark for establishing RPC session. (shv)
+
+  BUG FIXES
+
+    HADOOP-6760. WebServer shouldn't increase port number in case of negative
+    port setting caused by Jetty's race (cos)
+    
+    HADOOP-6881. Make WritableComparator intialize classes when
+    looking for their raw comparator, as classes often register raw
+    comparators in initializers, which are no longer automatically run
+    in Java 6 when a class is referenced. (cutting via omalley)
+
+Release 0.20.2 - 2010-2-16
+
+  NEW FEATURES
+
+    HADOOP-6218. Adds a feature where TFile can be split by Record
+    Sequence number. (Hong Tang and Raghu Angadi via ddas)
+
+  BUG FIXES
+
+    HADOOP-6231. Allow caching of filesystem instances to be disabled on a
+    per-instance basis. (tomwhite)
+
+    HADOOP-5759. Fix for IllegalArgumentException when CombineFileInputFormat
+    is used as job InputFormat. (Amareshwari Sriramadasu via dhruba)
+
+    HADOOP-6097. Fix Path conversion in makeQualified and reset LineReader byte
+    count at the start of each block in Hadoop archives. (Ben Slusky, Tom
+    White, and Mahadev Konar via cdouglas)
+
+    HADOOP-6269. Fix threading issue with defaultResource in Configuration.
+    (Sreekanth Ramakrishnan via cdouglas)
+
+    HADOOP-6460. Reinitializes buffers used for serializing responses in ipc
+    server on exceeding maximum response size to free up Java heap. (suresh)
+
+    HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
+    GzipCodec. (Aaron Kimball via cdouglas)
+
+    HADOOP-6498. IPC client bug may cause rpc call hang. (Ruyue Ma and
+    hairong via hairong)
+
+  IMPROVEMENTS
+
+    HADOOP-5611. Fix C++ libraries to build on Debian Lenny. (Todd Lipcon
+    via tomwhite)
+
+    HADOOP-5612. Some c++ scripts are not chmodded before ant execution.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-1849. Add undocumented configuration parameter for per handler 
+    call queue size in IPC Server. (shv)
+
+Release 0.20.1 - 2009-09-01
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-5726. Remove pre-emption from capacity scheduler code base.
+    (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-5881. Simplify memory monitoring and scheduling related
+    configuration. (Vinod Kumar Vavilapalli via yhemanth)
+
+  NEW FEATURES
+
+    HADOOP-6080. Introduce -skipTrash option to rm and rmr.
+    (Jakob Homan via shv)
+
+    HADOOP-3315. Add a new, binary file foramt, TFile. (Hong Tang via cdouglas)
+
+  IMPROVEMENTS
+
+    HADOOP-5711. Change Namenode file close log to info. (szetszwo)
+
+    HADOOP-5736. Update the capacity scheduler documentation for features
+    like memory based scheduling, job initialization and removal of pre-emption.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5714. Add a metric for NameNode getFileInfo operation. (Jakob Homan
+    via szetszwo)
+
+    HADOOP-4372. Improves the way history filenames are obtained and manipulated.
+    (Amar Kamat via ddas)
+
+    HADOOP-5897. Add name-node metrics to capture java heap usage.
+    (Suresh Srinivas via shv)
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+    HADOOP-5691. Makes org.apache.hadoop.mapreduce.Reducer concrete class
+    instead of abstract. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5646. Fixes a problem in TestQueueCapacities.
+    (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-5655. TestMRServerPorts fails on java.net.BindException. (Devaraj
+    Das via hairong)
+
+    HADOOP-5654. TestReplicationPolicy.<init> fails on java.net.BindException.
+    (hairong)
+
+    HADOOP-5688. Fix HftpFileSystem checksum path construction. (Tsz Wo
+    (Nicholas) Sze via cdouglas)
+
+    HADOOP-4674. Fix fs help messages for -test, -text, -tail, -stat 
+    and -touchz options.  (Ravi Phulari via szetszwo)
+
+    HADOOP-5718. Remove the check for the default queue in capacity scheduler.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5719. Remove jobs that failed initialization from the waiting queue
+    in the capacity scheduler. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4744. Attaching another fix to the jetty port issue. The TaskTracker
+    kills itself if it ever discovers that the port to which jetty is actually
+    bound is invalid (-1). (ddas)
+
+    HADOOP-5349. Fixes a problem in LocalDirAllocator to check for the return
+    path value that is returned for the case where the file we want to write
+    is of an unknown size. (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-5636. Prevents a job from going to RUNNING state after it has been
+    KILLED (this used to happen when the SetupTask would come back with a 
+    success after the job has been killed). (Amar Kamat via ddas)
+
+    HADOOP-5641. Fix a NullPointerException in capacity scheduler's memory
+    based scheduling code when jobs get retired. (yhemanth)
+
+    HADOOP-5828. Use absolute path for mapred.local.dir of JobTracker in
+    MiniMRCluster. (yhemanth)
+
+    HADOOP-4981. Fix capacity scheduler to schedule speculative tasks 
+    correctly in the presence of High RAM jobs.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5210. Solves a problem in the progress report of the reduce task.
+    (Ravi Gummadi via ddas)
+
+    HADOOP-5850. Fixes a problem to do with not being able to jobs with
+    0 maps/reduces. (Vinod K V via ddas)
+
+    HADOOP-4626. Correct the API links in hdfs forrest doc so that they
+    point to the same version of hadoop.  (szetszwo)
+
+    HADOOP-5883. Fixed tasktracker memory monitoring to account for
+    momentary spurts in memory usage due to java's fork() model.
+    (yhemanth)
+
+    HADOOP-5539. Fixes a problem to do with not preserving intermediate
+    output compression for merged data.
+    (Jothi Padmanabhan and Billy Pearson via ddas)
+
+    HADOOP-5932. Fixes a problem in capacity scheduler in computing
+    available memory on a tasktracker.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-5908. Fixes a problem to do with ArithmeticException in the 
+    JobTracker when there are jobs with 0 maps. (Amar Kamat via ddas)
+
+    HADOOP-5924. Fixes a corner case problem to do with job recovery with
+    empty history files. Also, after a JT restart, sends KillTaskAction to 
+    tasks that report back but the corresponding job hasn't been initialized
+    yet. (Amar Kamat via ddas)
+
+    HADOOP-5882. Fixes a reducer progress update problem for new mapreduce
+    api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5746. Fixes a corner case problem in Streaming, where if an exception
+    happens in MROutputThread after the last call to the map/reduce method, the 
+    exception goes undetected. (Amar Kamat via ddas)
+
+    HADOOP-5884. Fixes accounting in capacity scheduler so that high RAM jobs
+    take more slots. (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-5937. Correct a safemode message in FSNamesystem.  (Ravi Phulari
+    via szetszwo)
+
+    HADOOP-5869. Fix bug in assignment of setup / cleanup task that was
+    causing TestQueueCapacities to fail.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5921. Fixes a problem in the JobTracker where it sometimes never used
+    to come up due to a system file creation on JobTracker's system-dir failing. 
+    This problem would sometimes show up only when the FS for the system-dir 
+    (usually HDFS) is started at nearly the same time as the JobTracker. 
+    (Amar Kamat via ddas)
+
+    HADOOP-5920. Fixes a testcase failure for TestJobHistory. 
+    (Amar Kamat via ddas)
+
+    HADOOP-6139. Fix the FsShell help messages for rm and rmr.  (Jakob Homan
+    via szetszwo)
+
+    HADOOP-6145. Fix FsShell rm/rmr error messages when there is a FNFE.
+    (Jakob Homan via szetszwo)
+
+    HADOOP-6150. Users should be able to instantiate comparator using TFile
+    API. (Hong Tang via rangadi)
+
+Release 0.20.0 - 2009-04-15
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-4210. Fix findbugs warnings for equals implementations of mapred ID
+    classes. Removed public, static ID::read and ID::forName; made ID an
+    abstract class. (Suresh Srinivas via cdouglas)
+
+    HADOOP-4253. Fix various warnings generated by findbugs. 
+    Following deprecated methods in RawLocalFileSystem are removed:
+  	  public String getName()
+  	  public void lock(Path p, boolean shared)
+  	  public void release(Path p) 
+    (Suresh Srinivas via johan)
+
+    HADOOP-4618. Move http server from FSNamesystem into NameNode.
+    FSNamesystem.getNameNodeInfoPort() is removed.
+    FSNamesystem.getDFSNameNodeMachine() and FSNamesystem.getDFSNameNodePort()
+      replaced by FSNamesystem.getDFSNameNodeAddress().
+    NameNode(bindAddress, conf) is removed.
+    (shv)
+
+    HADOOP-4567. GetFileBlockLocations returns the NetworkTopology
+    information of the machines where the blocks reside. (dhruba)
+
+    HADOOP-4435. The JobTracker WebUI displays the amount of heap memory 
+    in use. (dhruba)
+
+    HADOOP-4628. Move Hive into a standalone subproject. (omalley)
+
+    HADOOP-4188. Removes task's dependency on concrete filesystems.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-1650. Upgrade to Jetty 6. (cdouglas)
+
+    HADOOP-3986. Remove static Configuration from JobClient. (Amareshwari
+    Sriramadasu via cdouglas)
+      JobClient::setCommandLineConfig is removed
+      JobClient::getCommandLineConfig is removed
+      JobShell, TestJobShell classes are removed
+
+    HADOOP-4422. S3 file systems should not create bucket.
+    (David Phillips via tomwhite)
+
+    HADOOP-4035. Support memory based scheduling in capacity scheduler.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-3497. Fix bug in overly restrictive file globbing with a
+    PathFilter. (tomwhite)
+
+    HADOOP-4445. Replace running task counts with running task
+    percentage in capacity scheduler UI. (Sreekanth Ramakrishnan via
+    yhemanth)
+
+    HADOOP-4631. Splits the configuration into three parts - one for core,
+    one for mapred and the last one for HDFS. (Sharad Agarwal via cdouglas)
+
+    HADOOP-3344. Fix libhdfs build to use autoconf and build the same
+    architecture (32 vs 64 bit) of the JVM running Ant.  The libraries for
+    pipes, utils, and libhdfs are now all in c++/<os_osarch_jvmdatamodel>/lib. 
+    (Giridharan Kesavan via nigel)
+
+    HADOOP-4874. Remove LZO codec because of licensing issues. (omalley)
+
+    HADOOP-4970. The full path name of a file is preserved inside Trash.
+    (Prasad Chakka via dhruba)
+
+    HADOOP-4103. NameNode keeps a count of missing blocks. It warns on 
+    WebUI if there are such blocks. '-report' and '-metaSave' have extra
+    info to track such blocks. (Raghu Angadi)
+
+    HADOOP-4783. Change permissions on history files on the jobtracker
+    to be only group readable instead of world readable.
+    (Amareshwari Sriramadasu via yhemanth)
+
+  NEW FEATURES
+
+    HADOOP-4575. Add a proxy service for relaying HsftpFileSystem requests.
+    Includes client authentication via user certificates and config-based
+    access control. (Kan Zhang via cdouglas)
+
+    HADOOP-4661. Add DistCh, a new tool for distributed ch{mod,own,grp}.
+    (szetszwo)
+
+    HADOOP-4709. Add several new features and bug fixes to Chukwa.
+      Added Hadoop Infrastructure Care Center (UI for visualize data collected
+                                               by Chukwa)
+      Added FileAdaptor for streaming small file in one chunk
+      Added compression to archive and demux output
+      Added unit tests and validation for agent, collector, and demux map 
+        reduce job
+      Added database loader for loading demux output (sequence file) to jdbc 
+        connected database
+      Added algorithm to distribute collector load more evenly
+    (Jerome Boulon, Eric Yang, Andy Konwinski, Ariel Rabkin via cdouglas)
+
+    HADOOP-4179. Add Vaidya tool to analyze map/reduce job logs for performanc
+    problems. (Suhas Gogate via omalley)
+
+    HADOOP-4029. Add NameNode storage information to the dfshealth page and
+    move DataNode information to a separated page. (Boris Shkolnik via
+    szetszwo)
+
+    HADOOP-4348. Add service-level authorization for Hadoop. (acmurthy) 
+
+    HADOOP-4826. Introduce admin command saveNamespace. (shv)
+
+    HADOOP-3063  BloomMapFile - fail-fast version of MapFile for sparsely
+    populated key space (Andrzej Bialecki via stack)
+
+    HADOOP-1230. Add new map/reduce API and deprecate the old one. Generally,
+    the old code should work without problem. The new api is in 
+    org.apache.hadoop.mapreduce and the old classes in org.apache.hadoop.mapred
+    are deprecated. Differences in the new API:
+      1. All of the methods take Context objects that allow us to add new
+         methods without breaking compatability.
+      2. Mapper and Reducer now have a "run" method that is called once and
+         contains the control loop for the task, which lets applications
+         replace it.
+      3. Mapper and Reducer by default are Identity Mapper and Reducer.
+      4. The FileOutputFormats use part-r-00000 for the output of reduce 0 and
+         part-m-00000 for the output of map 0.
+      5. The reduce grouping comparator now uses the raw compare instead of 
+         object compare.
+      6. The number of maps in FileInputFormat is controlled by min and max
+         split size rather than min size and the desired number of maps.
+      (omalley)
+    
+    HADOOP-3305.  Use Ivy to manage dependencies.  (Giridharan Kesavan
+    and Steve Loughran via cutting)
+
+  IMPROVEMENTS
+
+    HADOOP-4749. Added a new counter REDUCE_INPUT_BYTES. (Yongqiang He via 
+    zshao)
+
+    HADOOP-4234. Fix KFS "glue" layer to allow applications to interface
+    with multiple KFS metaservers. (Sriram Rao via lohit)
+
+    HADOOP-4245. Update to latest version of KFS "glue" library jar. 
+    (Sriram Rao via lohit)
+
+    HADOOP-4244. Change test-patch.sh to check Eclipse classpath no matter
+    it is run by Hudson or not. (szetszwo)
+
+    HADOOP-3180. Add name of missing class to WritableName.getClass 
+    IOException. (Pete Wyckoff via omalley)
+
+    HADOOP-4178. Make the capacity scheduler's default values configurable.
+    (Sreekanth Ramakrishnan via omalley)
+
+    HADOOP-4262. Generate better error message when client exception has null
+    message. (stevel via omalley)
+
+    HADOOP-4226. Refactor and document LineReader to make it more readily
+    understandable. (Yuri Pradkin via cdouglas)
+    
+    HADOOP-4238. When listing jobs, if scheduling information isn't available 
+    print NA instead of empty output. (Sreekanth Ramakrishnan via johan)
+
+    HADOOP-4284. Support filters that apply to all requests, or global filters,
+    to HttpServer. (Kan Zhang via cdouglas)
+    
+    HADOOP-4276. Improve the hashing functions and deserialization of the 
+    mapred ID classes. (omalley)
+
+    HADOOP-4485. Add a compile-native ant task, as a shorthand. (enis)
+
+    HADOOP-4454. Allow # comments in slaves file. (Rama Ramasamy via omalley)
+
+    HADOOP-3461. Remove hdfs.StringBytesWritable. (szetszwo)
+
+    HADOOP-4437. Use Halton sequence instead of java.util.Random in 
+    PiEstimator. (szetszwo)
+
+    HADOOP-4572. Change INode and its sub-classes to package private. 
+    (szetszwo)
+
+    HADOOP-4187. Does a runtime lookup for JobConf/JobConfigurable, and if 
+    found, invokes the appropriate configure method. (Sharad Agarwal via ddas)
+
+    HADOOP-4453. Improve ssl configuration and handling in HsftpFileSystem,
+    particularly when used with DistCp. (Kan Zhang via cdouglas)
+
+    HADOOP-4583. Several code optimizations in HDFS.  (Suresh Srinivas via
+    szetszwo)
+
+    HADOOP-3923. Remove org.apache.hadoop.mapred.StatusHttpServer.  (szetszwo)
+    
+    HADOOP-4622. Explicitly specify interpretor for non-native
+    pipes binaries. (Fredrik Hedberg via johan)
+    
+    HADOOP-4505. Add a unit test to test faulty setup task and cleanup
+    task killing the job. (Amareshwari Sriramadasu via johan)
+
+    HADOOP-4608. Don't print a stack trace when the example driver gets an
+    unknown program to run. (Edward Yoon via omalley)
+
+    HADOOP-4645. Package HdfsProxy contrib project without the extra level
+    of directories. (Kan Zhang via omalley)
+
+    HADOOP-4126. Allow access to HDFS web UI on EC2 (tomwhite via omalley)
+
+    HADOOP-4612. Removes RunJar's dependency on JobClient.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-4185. Adds setVerifyChecksum() method to FileSystem.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-4523. Prevent too many tasks scheduled on a node from bringing
+    it down by monitoring for cumulative memory usage across tasks.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-4640. Adds an input format that can split lzo compressed
+    text files. (johan)
+    
+    HADOOP-4666. Launch reduces only after a few maps have run in the 
+    Fair Scheduler. (Matei Zaharia via johan)    
+
+    HADOOP-4339. Remove redundant calls from FileSystem/FsShell when
+    generating/processing ContentSummary. (David Phillips via cdouglas)
+
+    HADOOP-2774. Add counters tracking records spilled to disk in MapTask and
+    ReduceTask. (Ravi Gummadi via cdouglas)
+
+    HADOOP-4513. Initialize jobs asynchronously in the capacity scheduler.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4649. Improve abstraction for spill indices. (cdouglas)
+
+    HADOOP-3770. Add gridmix2, an iteration on the gridmix benchmark. (Runping
+    Qi via cdouglas)
+
+    HADOOP-4708. Add support for dfsadmin commands in TestCLI. (Boris Shkolnik
+    via cdouglas)
+
+    HADOOP-4758. Add a splitter for metrics contexts to support more than one
+    type of collector. (cdouglas)
+
+    HADOOP-4722. Add tests for dfsadmin quota error messages. (Boris Shkolnik
+    via cdouglas)
+
+    HADOOP-4690.  fuse-dfs - create source file/function + utils + config +
+    main source files. (pete wyckoff via mahadev)
+
+    HADOOP-3750. Fix and enforce module dependencies. (Sharad Agarwal via
+    tomwhite)
+
+    HADOOP-4747. Speed up FsShell::ls by removing redundant calls to the
+    filesystem. (David Phillips via cdouglas)
+
+    HADOOP-4305. Improves the blacklisting strategy, whereby, tasktrackers
+    that are blacklisted are not given tasks to run from other jobs, subject
+    to the following conditions (all must be met):
+    1) The TaskTracker has been blacklisted by at least 4 jobs (configurable)
+    2) The TaskTracker has been blacklisted 50% more number of times than
+       the average (configurable)
+    3) The cluster has less than 50% trackers blacklisted
+    Once in 24 hours, a TaskTracker blacklisted for all jobs is given a chance.
+    Restarting the TaskTracker moves it out of the blacklist.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4688. Modify the MiniMRDFSSort unit test to spill multiple times,
+    exercising the map-side merge code. (cdouglas)
+
+    HADOOP-4737. Adds the KILLED notification when jobs get killed.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4728. Add a test exercising different namenode configurations.
+    (Boris Shkolnik via cdouglas)
+
+    HADOOP-4807. Adds JobClient commands to get the active/blacklisted tracker
+    names. Also adds commands to display running/completed task attempt IDs. 
+    (ddas)
+
+    HADOOP-4699. Remove checksum validation from map output servlet. (cdouglas)
+
+    HADOOP-4838. Added a registry to automate metrics and mbeans management.
+    (Sanjay Radia via acmurthy) 
+
+    HADOOP-3136. Fixed the default scheduler to assign multiple tasks to each 
+    tasktracker per heartbeat, when feasible. To ensure locality isn't hurt 
+    too badly, the scheudler will not assign more than one off-switch task per 
+    heartbeat. The heartbeat interval is also halved since the task-tracker is 
+    fixed to no longer send out heartbeats on each task completion. A 
+    slow-start for scheduling reduces is introduced to ensure that reduces 
+    aren't started till sufficient number of maps are done, else reduces of 
+    jobs whose maps aren't scheduled might swamp the cluster.
+    Configuration changes to mapred-default.xml:
+      add mapred.reduce.slowstart.completed.maps 
+    (acmurthy)
+
+    HADOOP-4545. Add example and test case of secondary sort for the reduce.
+    (omalley)
+
+    HADOOP-4753. Refactor gridmix2 to reduce code duplication. (cdouglas)
+
+    HADOOP-4909. Fix Javadoc and make some of the API more consistent in their
+    use of the JobContext instead of Configuration. (omalley)
+
+    HADOOP-4920.  Stop storing Forrest output in Subversion. (cutting)
+
+    HADOOP-4948. Add parameters java5.home and forrest.home to the ant commands
+    in test-patch.sh.  (Giridharan Kesavan via szetszwo)
+
+    HADOOP-4830. Add end-to-end test cases for testing queue capacities.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-4980. Improve code layout of capacity scheduler to make it 
+    easier to fix some blocker bugs. (Vivek Ratan via yhemanth)
+
+    HADOOP-4916. Make user/location of Chukwa installation configurable by an
+    external properties file. (Eric Yang via cdouglas)
+
+    HADOOP-4950. Make the CompressorStream, DecompressorStream, 
+    BlockCompressorStream, and BlockDecompressorStream public to facilitate 
+    non-Hadoop codecs. (omalley)
+
+    HADOOP-4843. Collect job history and configuration in Chukwa. (Eric Yang
+    via cdouglas)
+
+    HADOOP-5030. Build Chukwa RPM to install into configured directory. (Eric
+    Yang via cdouglas)
+    
+    HADOOP-4828. Updates documents to do with configuration (HADOOP-4631).
+    (Sharad Agarwal via ddas)
+
+    HADOOP-4939. Adds a test that would inject random failures for tasks in 
+    large jobs and would also inject TaskTracker failures. (ddas)
+
+    HADOOP-4944. A configuration file can include other configuration
+    files. (Rama Ramasamy via dhruba)
+
+    HADOOP-4804. Provide Forrest documentation for the Fair Scheduler.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5248. A testcase that checks for the existence of job directory
+    after the job completes. Fails if it exists. (ddas)
+
+    HADOOP-4664. Introduces multiple job initialization threads, where the 
+    number of threads are configurable via mapred.jobinit.threads.
+    (Matei Zaharia and Jothi Padmanabhan via ddas)
+
+    HADOOP-4191. Adds a testcase for JobHistory. (Ravi Gummadi via ddas)
+
+    HADOOP-5466. Change documenation CSS style for headers and code. (Corinne
+    Chandel via szetszwo)
+
+    HADOOP-5275. Add ivy directory and files to built tar.
+    (Giridharan Kesavan via nigel)
+
+    HADOOP-5468. Add sub-menus to forrest documentation and make some minor
+    edits.  (Corinne Chandel via szetszwo)
+
+    HADOOP-5437. Fix TestMiniMRDFSSort to properly test jvm-reuse. (omalley)
+
+    HADOOP-5521. Removes dependency of TestJobInProgress on RESTART_COUNT 
+    JobHistory tag. (Ravi Gummadi via ddas)
+
+  OPTIMIZATIONS
+
+    HADOOP-3293. Fixes FileInputFormat to do provide locations for splits
+    based on the rack/host that has the most number of bytes.
+    (Jothi Padmanabhan via ddas)
+
+    HADOOP-4683. Fixes Reduce shuffle scheduler to invoke
+    getMapCompletionEvents in a separate thread. (Jothi Padmanabhan
+    via ddas)
+
+  BUG FIXES
+
+    HADOOP-4204. Fix findbugs warnings related to unused variables, naive
+    Number subclass instantiation, Map iteration, and badly scoped inner
+    classes. (Suresh Srinivas via cdouglas)
+
+    HADOOP-4207. Update derby jar file to release 10.4.2 release.
+    (Prasad Chakka via dhruba)
+
+    HADOOP-4325. SocketInputStream.read() should return -1 in case EOF.
+    (Raghu Angadi)
+
+    HADOOP-4408. FsAction functions need not create new objects. (cdouglas)
+
+    HADOOP-4440.  TestJobInProgressListener tests for jobs killed in queued 
+    state (Amar Kamat via ddas)
+
+    HADOOP-4346. Implement blocking connect so that Hadoop is not affected
+    by selector problem with JDK default implementation. (Raghu Angadi)
+
+    HADOOP-4388. If there are invalid blocks in the transfer list, Datanode
+    should handle them and keep transferring the remaining blocks.  (Suresh
+    Srinivas via szetszwo)
+
+    HADOOP-4587. Fix a typo in Mapper javadoc.  (Koji Noguchi via szetszwo)
+
+    HADOOP-4530. In fsck, HttpServletResponse sendError fails with
+    IllegalStateException. (hairong)
+
+    HADOOP-4377. Fix a race condition in directory creation in
+    NativeS3FileSystem. (David Phillips via cdouglas)
+
+    HADOOP-4621. Fix javadoc warnings caused by duplicate jars. (Kan Zhang via
+    cdouglas)
+
+    HADOOP-4566. Deploy new hive code to support more types.
+    (Zheng Shao via dhruba)
+
+    HADOOP-4571. Add chukwa conf files to svn:ignore list. (Eric Yang via
+    szetszwo)
+
+    HADOOP-4589. Correct PiEstimator output messages and improve the code
+    readability. (szetszwo)
+
+    HADOOP-4650. Correct a mismatch between the default value of
+    local.cache.size in the config and the source. (Jeff Hammerbacher via
+    cdouglas)
+
+    HADOOP-4606. Fix cygpath error if the log directory does not exist.
+    (szetszwo via omalley)
+
+    HADOOP-4141. Fix bug in ScriptBasedMapping causing potential infinite
+    loop on misconfigured hadoop-site. (Aaron Kimball via tomwhite)
+
+    HADOOP-4691. Correct a link in the javadoc of IndexedSortable. (szetszwo)
+
+    HADOOP-4598. '-setrep' command skips under-replicated blocks. (hairong)
+
+    HADOOP-4429. Set defaults for user, group in UnixUserGroupInformation so
+    login fails more predictably when misconfigured. (Alex Loddengaard via
+    cdouglas)
+
+    HADOOP-4676. Fix broken URL in blacklisted tasktrackers page. (Amareshwari
+    Sriramadasu via cdouglas)
+
+    HADOOP-3422  Ganglia counter metrics are all reported with the metric
+    name "value", so the counter values can not be seen. (Jason Attributor
+    and Brian Bockelman via stack)
+
+    HADOOP-4704. Fix javadoc typos "the the". (szetszwo)
+
+    HADOOP-4677. Fix semantics of FileSystem::getBlockLocations to return
+    meaningful values. (Hong Tang via cdouglas)
+
+    HADOOP-4669. Use correct operator when evaluating whether access time is
+    enabled (Dhruba Borthakur via cdouglas)
+
+    HADOOP-4732. Pass connection and read timeouts in the correct order when
+    setting up fetch in reduce. (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-4558. Fix capacity reclamation in capacity scheduler.
+    (Amar Kamat via yhemanth)
+
+    HADOOP-4770. Fix rungridmix_2 script to work with RunJar. (cdouglas)
+
+    HADOOP-4738. When using git, the saveVersion script will use only the
+    commit hash for the version and not the message, which requires escaping.
+    (cdouglas)
+
+    HADOOP-4576. Show pending job count instead of task count in the UI per
+    queue in capacity scheduler. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4623. Maintain running tasks even if speculative execution is off.
+    (Amar Kamat via yhemanth)
+
+    HADOOP-4786. Fix broken compilation error in 
+    TestTrackerBlacklistAcrossJobs. (yhemanth)
+
+    HADOOP-4785. Fixes theJobTracker heartbeat to not make two calls to 
+    System.currentTimeMillis(). (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4792. Add generated Chukwa configuration files to version control
+    ignore lists. (cdouglas)
+
+    HADOOP-4796. Fix Chukwa test configuration, remove unused components. (Eric
+    Yang via cdouglas)
+
+    HADOOP-4708. Add binaries missed in the initial checkin for Chukwa. (Eric
+    Yang via cdouglas)
+
+    HADOOP-4805. Remove black list collector from Chukwa Agent HTTP Sender.
+    (Eric Yang via cdouglas)
+
+    HADOOP-4837. Move HADOOP_CONF_DIR configuration to chukwa-env.sh (Jerome
+    Boulon via cdouglas)
+
+    HADOOP-4825. Use ps instead of jps for querying process status in Chukwa.
+    (Eric Yang via cdouglas)
+
+    HADOOP-4844. Fixed javadoc for
+    org.apache.hadoop.fs.permission.AccessControlException to document that
+    it's deprecated in favour of
+    org.apache.hadoop.security.AccessControlException. (acmurthy) 
+
+    HADOOP-4706. Close the underlying output stream in
+    IFileOutputStream::close. (Jothi Padmanabhan via cdouglas)
+
+    HADOOP-4855. Fixed command-specific help messages for refreshServiceAcl in
+    DFSAdmin and MRAdmin. (acmurthy)
+
+    HADOOP-4820. Remove unused method FSNamesystem::deleteInSafeMode. (Suresh
+    Srinivas via cdouglas)
+
+    HADOOP-4698. Lower io.sort.mb to 10 in the tests and raise the junit memory
+    limit to 512m from 256m. (Nigel Daley via cdouglas)
+
+    HADOOP-4860. Split TestFileTailingAdapters into three separate tests to
+    avoid contention. (Eric Yang via cdouglas)
+
+    HADOOP-3921. Fixed clover (code coverage) target to work with JDK 6.
+    (tomwhite via nigel)
+
+    HADOOP-4845. Modify the reduce input byte counter to record only the
+    compressed size and add a human-readable label. (Yongqiang He via cdouglas)
+
+    HADOOP-4458. Add a test creating symlinks in the working directory.
+    (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-4879. Fix org.apache.hadoop.mapred.Counters to correctly define
+    Object.equals rather than depend on contentEquals api. (omalley via 
+    acmurthy)
+
+    HADOOP-4791. Fix rpm build process for Chukwa. (Eric Yang via cdouglas)
+
+    HADOOP-4771. Correct initialization of the file count for directories 
+    with quotas. (Ruyue Ma via shv)
+
+    HADOOP-4878. Fix eclipse plugin classpath file to point to ivy's resolved
+    lib directory and added the same to test-patch.sh. (Giridharan Kesavan via
+    acmurthy)
+
+    HADOOP-4774. Fix default values of some capacity scheduler configuration
+    items which would otherwise not work on a fresh checkout.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4876. Fix capacity scheduler reclamation by updating count of
+    pending tasks correctly. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4849. Documentation for Service Level Authorization implemented in
+    HADOOP-4348. (acmurthy)
+
+    HADOOP-4827. Replace Consolidator with Aggregator macros in Chukwa (Eric
+    Yang via cdouglas)
+
+    HADOOP-4894. Correctly parse ps output in Chukwa jettyCollector.sh. (Ari
+    Rabkin via cdouglas)
+
+    HADOOP-4892. Close fds out of Chukwa ExecPlugin. (Ari Rabkin via cdouglas)
+
+    HADOOP-4889. Fix permissions in RPM packaging. (Eric Yang via cdouglas)
+
+    HADOOP-4869. Fixes the TT-JT heartbeat to have an explicit flag for 
+    restart apart from the initialContact flag that there was earlier.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4716. Fixes ReduceTask.java to clear out the mapping between
+    hosts and MapOutputLocation upon a JT restart (Amar Kamat via ddas)
+
+    HADOOP-4880. Removes an unnecessary testcase from TestJobTrackerRestart.
+    (Amar Kamat via ddas)
+
+    HADOOP-4924. Fixes a race condition in TaskTracker re-init. (ddas)
+
+    HADOOP-4854. Read reclaim capacity interval from capacity scheduler 
+    configuration. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4896. HDFS Fsck does not load HDFS configuration. (Raghu Angadi)
+
+    HADOOP-4956. Creates TaskStatus for failed tasks with an empty Counters 
+    object instead of null. (ddas)
+
+    HADOOP-4979. Fix capacity scheduler to block cluster for failed high
+    RAM requirements across task types. (Vivek Ratan via yhemanth)
+
+    HADOOP-4949. Fix native compilation. (Chris Douglas via acmurthy) 
+
+    HADOOP-4787. Fixes the testcase TestTrackerBlacklistAcrossJobs which was
+    earlier failing randomly. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4914. Add description fields to Chukwa init.d scripts (Eric Yang via
+    cdouglas)
+
+    HADOOP-4884. Make tool tip date format match standard HICC format. (Eric
+    Yang via cdouglas)
+
+    HADOOP-4925. Make Chukwa sender properties configurable. (Ari Rabkin via
+    cdouglas)
+
+    HADOOP-4947. Make Chukwa command parsing more forgiving of whitespace. (Ari
+    Rabkin via cdouglas)
+
+    HADOOP-5026. Make chukwa/bin scripts executable in repository. (Andy
+    Konwinski via cdouglas)
+
+    HADOOP-4977. Fix a deadlock between the reclaimCapacity and assignTasks
+    in capacity scheduler. (Vivek Ratan via yhemanth)
+
+    HADOOP-4988. Fix reclaim capacity to work even when there are queues with
+    no capacity. (Vivek Ratan via yhemanth)
+
+    HADOOP-5065. Remove generic parameters from argument to 
+    setIn/OutputFormatClass so that it works with SequenceIn/OutputFormat.
+    (cdouglas via omalley)
+
+    HADOOP-4818. Pass user config to instrumentation API. (Eric Yang via
+    cdouglas)
+
+    HADOOP-4993. Fix Chukwa agent configuration and startup to make it both
+    more modular and testable. (Ari Rabkin via cdouglas)
+
+    HADOOP-5048. Fix capacity scheduler to correctly cleanup jobs that are
+    killed after initialization, but before running. 
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4671. Mark loop control variables shared between threads as
+    volatile. (cdouglas)
+
+    HADOOP-5079. HashFunction inadvertently destroys some randomness
+    (Jonathan Ellis via stack)
+
+    HADOOP-4999. A failure to write to FsEditsLog results in 
+    IndexOutOfBounds exception. (Boris Shkolnik via rangadi)
+
+    HADOOP-5139. Catch IllegalArgumentException during metrics registration 
+    in RPC.  (Hairong Kuang via szetszwo)
+
+    HADOOP-5085. Copying a file to local with Crc throws an exception.
+    (hairong)
+
+    HADOOP-5211. Fix check for job completion in TestSetupAndCleanupFailure.
+    (enis)
+
+    HADOOP-5254. The Configuration class should be able to work with XML
+    parsers that do not support xmlinclude. (Steve Loughran via dhruba)
+
+    HADOOP-4692. Namenode in infinite loop for replicating/deleting corrupt
+    blocks. (hairong)
+
+    HADOOP-5255. Fix use of Math.abs to avoid overflow. (Jonathan Ellis via
+    cdouglas)
+
+    HADOOP-5269. Fixes a problem to do with tasktracker holding on to 
+    FAILED_UNCLEAN or KILLED_UNCLEAN tasks forever. (Amareshwari Sriramadasu
+    via ddas) 
+
+    HADOOP-5214. Fixes a ConcurrentModificationException while the Fairshare
+    Scheduler accesses the tasktrackers stored by the JobTracker.
+    (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-5233. Addresses the three issues - Race condition in updating
+    status, NPE in TaskTracker task localization when the conf file is missing
+    (HADOOP-5234) and NPE in handling KillTaskAction of a cleanup task 
+    (HADOOP-5235). (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5247. Introduces a broadcast of KillJobAction to all trackers when
+    a job finishes. This fixes a bunch of problems to do with NPE when a 
+    completed job is not in memory and a tasktracker comes to the jobtracker 
+    with a status report of a task belonging to that job. (Amar Kamat via ddas)
+
+    HADOOP-5282. Fixed job history logs for task attempts that are
+    failed by the JobTracker, say due to lost task trackers. (Amar
+    Kamat via yhemanth)
+
+    HADOOP-5241. Fixes a bug in disk-space resource estimation. Makes
+    the estimation formula linear where blowUp =
+    Total-Output/Total-Input. (Sharad Agarwal via ddas)
+
+    HADOOP-5142. Fix MapWritable#putAll to store key/value classes. 
+    (Do??acan G??ney via enis)
+
+    HADOOP-4744. Workaround for jetty6 returning -1 when getLocalPort
+    is invoked on the connector. The workaround patch retries a few
+    times before failing.  (Jothi Padmanabhan via yhemanth)
+
+    HADOOP-5280. Adds a check to prevent a task state transition from
+    FAILED to any of UNASSIGNED, RUNNING, COMMIT_PENDING or
+    SUCCEEDED. (ddas)
+
+    HADOOP-5272. Fixes a problem to do with detecting whether an
+    attempt is the first attempt of a Task. This affects JobTracker
+    restart. (Amar Kamat via ddas)
+
+    HADOOP-5306. Fixes a problem to do with logging/parsing the http port of a 
+    lost tracker. Affects JobTracker restart. (Amar Kamat via ddas)
+
+    HADOOP-5111. Fix Job::set* methods to work with generics. (cdouglas)
+
+    HADOOP-5274. Fix gridmix2 dependency on wordcount example. (cdouglas)
+
+    HADOOP-5145. Balancer sometimes runs out of memory after running
+    days or weeks.  (hairong)
+
+    HADOOP-5338. Fix jobtracker restart to clear task completion
+    events cached by tasktrackers forcing them to fetch all events
+    afresh, thus avoiding missed task completion events on the
+    tasktrackers. (Amar Kamat via yhemanth)
+
+    HADOOP-4695. Change TestGlobalFilter so that it allows a web page to be
+    filtered more than once for a single access.  (Kan Zhang via szetszwo) 
+
+    HADOOP-5298. Change TestServletFilter so that it allows a web page to be
+    filtered more than once for a single access.  (szetszwo) 
+
+    HADOOP-5432. Disable ssl during unit tests in hdfsproxy, as it is unused
+    and causes failures. (cdouglas)
+
+    HADOOP-5416. Correct the shell command "fs -test" forrest doc description.
+    (Ravi Phulari via szetszwo) 
+
+    HADOOP-5327. Fixed job tracker to remove files from system directory on
+    ACL check failures and also check ACLs on restart.
+    (Amar Kamat via yhemanth)
+
+    HADOOP-5395. Change the exception message when a job is submitted to an
+    invalid queue. (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-5276. Fixes a problem to do with updating the start time of
+    a task when the tracker that ran the task is lost. (Amar Kamat via
+    ddas)
+
+    HADOOP-5278. Fixes a problem to do with logging the finish time of
+    a task during recovery (after a JobTracker restart). (Amar Kamat
+    via ddas)
+
+    HADOOP-5490. Fixes a synchronization problem in the
+    EagerTaskInitializationListener class. (Jothi Padmanabhan via
+    ddas)
+
+    HADOOP-5493. The shuffle copier threads return the codecs back to
+    the pool when the shuffle completes. (Jothi Padmanabhan via ddas)
+
+    HADOOP-5414. Fixes IO exception while executing hadoop fs -touchz
+    fileName by making sure that lease renewal thread exits before dfs
+    client exits.  (hairong)
+
+    HADOOP-5103. FileInputFormat now reuses the clusterMap network
+    topology object and that brings down the log messages in the
+    JobClient to do with NetworkTopology.add significantly. (Jothi
+    Padmanabhan via ddas)
+
+    HADOOP-5483. Fixes a problem in the Directory Cleanup Thread due to which
+    TestMiniMRWithDFS sometimes used to fail. (ddas) 
+
+    HADOOP-5281. Prevent sharing incompatible ZlibCompressor instances between
+    GzipCodec and DefaultCodec. (cdouglas)
+
+    HADOOP-5463. Balancer throws "Not a host:port pair" unless port is
+    specified in fs.default.name. (Stuart White via hairong)
+
+    HADOOP-5514. Fix JobTracker metrics and add metrics for wating, failed
+    tasks. (cdouglas)
+
+    HADOOP-5516. Fix NullPointerException in TaskMemoryManagerThread
+    that comes when monitored processes disappear when the thread is
+    running.  (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-5382. Support combiners in the new context object API. (omalley)
+
+    HADOOP-5471. Fixes a problem to do with updating the log.index file in the 
+    case where a cleanup task is run. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5534. Fixed a deadlock in Fair scheduler's servlet.
+    (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-5328. Fixes a problem in the renaming of job history files during 
+    job recovery. (Amar Kamat via ddas)
+
+    HADOOP-5417. Don't ignore InterruptedExceptions that happen when calling 
+    into rpc. (omalley)
+
+    HADOOP-5320. Add a close() in TestMapReduceLocal.  (Jothi Padmanabhan
+    via szetszwo)
+
+    HADOOP-5520. Fix a typo in disk quota help message.  (Ravi Phulari
+    via szetszwo)
+
+    HADOOP-5519. Remove claims from mapred-default.xml that prime numbers
+    of tasks are helpful.  (Owen O'Malley via szetszwo)
+
+    HADOOP-5484. TestRecoveryManager fails wtih FileAlreadyExistsException.
+    (Amar Kamat via hairong)
+
+    HADOOP-5564. Limit the JVM heap size in the java command for initializing
+    JAVA_PLATFORM.  (Suresh Srinivas via szetszwo)
+
+    HADOOP-5565. Add API for failing/finalized jobs to the JT metrics
+    instrumentation. (Jerome Boulon via cdouglas)
+
+    HADOOP-5390. Remove duplicate jars from tarball, src from binary tarball
+    added by hdfsproxy. (Zhiyong Zhang via cdouglas)
+
+    HADOOP-5066. Building binary tarball should not build docs/javadocs, copy
+    src, or run jdiff. (Giridharan Kesavan via cdouglas)
+
+    HADOOP-5459. Fix undetected CRC errors where intermediate output is closed
+    before it has been completely consumed. (cdouglas)
+
+    HADOOP-5571. Remove widening primitive conversion in TupleWritable mask
+    manipulation. (Jingkei Ly via cdouglas)
+
+    HADOOP-5588. Remove an unnecessary call to listStatus(..) in
+    FileSystem.globStatusInternal(..).  (Hairong Kuang via szetszwo)
+
+    HADOOP-5473. Solves a race condition in killing a task - the state is KILLED
+    if there is a user request pending to kill the task and the TT reported
+    the state as SUCCESS. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5576. Fix LocalRunner to work with the new context object API in
+    mapreduce. (Tom White via omalley)
+
+    HADOOP-4374. Installs a shutdown hook in the Task JVM so that log.index is
+    updated before the JVM exits. Also makes the update to log.index atomic.
+    (Ravi Gummadi via ddas)
+
+    HADOOP-5577. Add a verbose flag to mapreduce.Job.waitForCompletion to get
+    the running job's information printed to the user's stdout as it runs.
+    (omalley)
+
+    HADOOP-5607. Fix NPE in TestCapacityScheduler. (cdouglas)
+
+    HADOOP-5605. All the replicas incorrectly got marked as corrupt. (hairong)
+
+    HADOOP-5337. JobTracker, upon restart, now waits for the TaskTrackers to
+    join back before scheduling new tasks. This fixes race conditions associated
+    with greedy scheduling as was the case earlier. (Amar Kamat via ddas) 
+
+    HADOOP-5227. Fix distcp so -update and -delete can be meaningfully
+    combined. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-5305. Increase number of files and print debug messages in
+    TestCopyFiles.  (szetszwo)
+
+    HADOOP-5548. Add synchronization for JobTracker methods in RecoveryManager.
+    (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-3810. NameNode seems unstable on a cluster with little space left.
+    (hairong)
+
+    HADOOP-5068. Fix NPE in TestCapacityScheduler.  (Vinod Kumar Vavilapalli
+    via szetszwo)
+
+    HADOOP-5585. Clear FileSystem statistics between tasks when jvm-reuse
+    is enabled. (omalley)
+
+    HADOOP-5394. JobTracker might schedule 2 attempts of the same task 
+    with the same attempt id across restarts. (Amar Kamat via sharad)
+
+    HADOOP-5645. After HADOOP-4920 we need a place to checkin
+    releasenotes.html. (nigel)
+
+Release 0.19.2 - 2009-06-30
+
+  BUG FIXES
+
+    HADOOP-5154. Fixes a deadlock in the fairshare scheduler. 
+    (Matei Zaharia via yhemanth)
+   
+    HADOOP-5146. Fixes a race condition that causes LocalDirAllocator to miss
+    files.  (Devaraj Das via yhemanth)
+
+    HADOOP-4638. Fixes job recovery to not crash the job tracker for problems
+    with a single job file. (Amar Kamat via yhemanth)
+
+    HADOOP-5384. Fix a problem that DataNodeCluster creates blocks with
+    generationStamp == 1.  (szetszwo)
+
+    HADOOP-5376. Fixes the code handling lost tasktrackers to set the task state
+    to KILLED_UNCLEAN only for relevant type of tasks.
+    (Amareshwari Sriramadasu via yhemanth)
+
+    HADOOP-5285. Fixes the issues - (1) obtainTaskCleanupTask checks whether job is
+    inited before trying to lock the JobInProgress (2) Moves the CleanupQueue class
+    outside the TaskTracker and makes it a generic class that is used by the 
+    JobTracker also for deleting the paths on the job's output fs. (3) Moves the
+    references to completedJobStore outside the block where the JobTracker is locked.
+    (ddas)
+
+    HADOOP-5392. Fixes a problem to do with JT crashing during recovery when
+    the job files are garbled. (Amar Kamat via ddas)
+
+    HADOOP-5332. Appending to files is not allowed (by default) unless
+    dfs.support.append is set to true. (dhruba)
+
+    HADOOP-5333. libhdfs supports appending to files. (dhruba)
+
+    HADOOP-3998. Fix dfsclient exception when JVM is shutdown. (dhruba)
+
+    HADOOP-5440. Fixes a problem to do with removing a taskId from the list
+    of taskIds that the TaskTracker's TaskMemoryManager manages.
+    (Amareshwari Sriramadasu via ddas)
+ 
+    HADOOP-5446. Restore TaskTracker metrics. (cdouglas)
+
+    HADOOP-5449. Fixes the history cleaner thread. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5479. NameNode should not send empty block replication request to
+    DataNode. (hairong)
+
+    HADOOP-5259. Job with output hdfs:/user/<username>/outputpath (no 
+    authority) fails with Wrong FS. (Doug Cutting via hairong)
+
+    HADOOP-5522. Documents the setup/cleanup tasks in the mapred tutorial.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5549. ReplicationMonitor should schedule both replication and
+    deletion work in one iteration. (hairong)
+
+    HADOOP-5554. DataNodeCluster and CreateEditsLog should create blocks with
+    the same generation stamp value. (hairong via szetszwo)
+
+    HADOOP-5231. Clones the TaskStatus before passing it to the JobInProgress.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4719. Fix documentation of 'ls' format for FsShell. (Ravi Phulari
+    via cdouglas)
+
+    HADOOP-5374. Fixes a NPE problem in getTasksToSave method.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4780. Cache the size of directories in DistributedCache, avoiding
+    long delays in recalculating it. (He Yongqiang via cdouglas)
+
+    HADOOP-5551. Prevent directory destruction on file create.
+    (Brian Bockelman via shv)
+
+    HADOOP-5671. Fix FNF exceptions when copying from old versions of
+    HftpFileSystem. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-5213. Fix Null pointer exception caused when bzip2compression 
+    was used and user closed a output stream without writing any data.
+    (Zheng Shao via dhruba)
+
+    HADOOP-5579. Set errno correctly in libhdfs for permission, quota, and FNF
+    conditions. (Brian Bockelman via cdouglas)
+
+    HADOOP-5816. Fixes a problem in the KeyFieldBasedComparator to do with
+    ArrayIndexOutOfBounds exception. (He Yongqiang via ddas)
+
+    HADOOP-5951. Add Apache license header to StorageInfo.java.  (Suresh
+    Srinivas via szetszwo)
+
+Release 0.19.1 - 2009-02-23 
+
+  IMPROVEMENTS
+
+    HADOOP-4739. Fix spelling and grammar, improve phrasing of some sections in
+    mapred tutorial. (Vivek Ratan via cdouglas)
+
+    HADOOP-3894. DFSClient logging improvements. (Steve Loughran via shv)
+
+    HADOOP-5126. Remove empty file BlocksWithLocations.java (shv)
+
+    HADOOP-5127. Remove public methods in FSDirectory. (Jakob Homan via shv)
+
+  BUG FIXES
+
+    HADOOP-4697. Fix getBlockLocations in KosmosFileSystem to handle multiple
+    blocks correctly. (Sriram Rao via cdouglas)
+
+    HADOOP-4420. Add null checks for job, caused by invalid job IDs.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-4632. Fix TestJobHistoryVersion to use test.build.dir instead of the
+    current workding directory for scratch space. (Amar Kamat via cdouglas)
+
+    HADOOP-4508. Fix FSDataOutputStream.getPos() for append. (dhruba via
+    szetszwo)
+
+    HADOOP-4727. Fix a group checking bug in fill_stat_structure(...) in
+    fuse-dfs.  (Brian Bockelman via szetszwo)
+
+    HADOOP-4836. Correct typos in mapred related documentation.  (Jord? Polo
+    via szetszwo)
+
+    HADOOP-4821. Usage description in the Quotas guide documentations are
+    incorrect. (Boris Shkolnik via hairong)
+
+    HADOOP-4847. Moves the loading of OutputCommitter to the Task.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4966. Marks completed setup tasks for removal. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4982. TestFsck should run in Eclipse. (shv)
+
+    HADOOP-5008. TestReplication#testPendingReplicationRetry leaves an opened
+    fd unclosed. (hairong)
+
+    HADOOP-4906. Fix TaskTracker OOM by keeping a shallow copy of JobConf in
+    TaskTracker.TaskInProgress. (Sharad Agarwal via acmurthy) 
+
+    HADOOP-4918. Fix bzip2 compression to work with Sequence Files.
+    (Zheng Shao via dhruba).
+
+    HADOOP-4965. TestFileAppend3 should close FileSystem. (shv)
+
+    HADOOP-4967. Fixes a race condition in the JvmManager to do with killing
+    tasks. (ddas)
+
+    HADOOP-5009. DataNode#shutdown sometimes leaves data block scanner
+    verification log unclosed. (hairong)
+
+    HADOOP-5086. Use the appropriate FileSystem for trash URIs. (cdouglas)
+    
+    HADOOP-4955. Make DBOutputFormat us column names from setOutput().
+    (Kevin Peterson via enis) 
+
+    HADOOP-4862. Minor : HADOOP-3678 did not remove all the cases of 
+    spurious IOExceptions logged by DataNode. (Raghu Angadi) 
+
+    HADOOP-5034. NameNode should send both replication and deletion requests
+    to DataNode in one reply to a heartbeat. (hairong)
+
+    HADOOP-4759. Removes temporary output directory for failed and killed
+    tasks  by launching special CLEANUP tasks for the same.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5161. Accepted sockets do not get placed in
+    DataXceiverServer#childSockets. (hairong)
+
+    HADOOP-5193. Correct calculation of edits modification time. (shv)
+
+    HADOOP-4494. Allow libhdfs to append to files.
+    (Pete Wyckoff via dhruba)
+
+    HADOOP-5166. Fix JobTracker restart to work when ACLs are configured
+    for the JobTracker. (Amar Kamat via yhemanth).
+
+    HADOOP-5067. Fixes TaskInProgress.java to keep track of count of failed and
+    killed tasks correctly. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4760. HDFS streams should not throw exceptions when closed twice. 
+    (enis)
+
+Release 0.19.0 - 2008-11-18
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-3595. Remove deprecated methods for mapred.combine.once 
+    functionality, which was necessary to providing backwards
+    compatible combiner semantics for 0.18. (cdouglas via omalley)
+
+    HADOOP-3667. Remove the following deprecated methods from JobConf:
+      addInputPath(Path)
+      getInputPaths()
+      getMapOutputCompressionType()
+      getOutputPath()
+      getSystemDir()
+      setInputPath(Path)
+      setMapOutputCompressionType(CompressionType style)
+      setOutputPath(Path)
+    (Amareshwari Sriramadasu via omalley)
+
+    HADOOP-3652. Remove deprecated class OutputFormatBase.
+    (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-2885. Break the hadoop.dfs package into separate packages under
+    hadoop.hdfs that reflect whether they are client, server, protocol, 
+    etc. DistributedFileSystem and DFSClient have moved and are now 
+    considered package private. (Sanjay Radia via omalley)
+
+    HADOOP-2325.  Require Java 6. (cutting)
+
+    HADOOP-372.  Add support for multiple input paths with a different
+    InputFormat and Mapper for each path.  (Chris Smith via tomwhite)
+
+    HADOOP-1700.  Support appending to file in HDFS. (dhruba)
+
+    HADOOP-3792. Make FsShell -test consistent with unix semantics, returning
+    zero for true and non-zero for false. (Ben Slusky via cdouglas)
+
+    HADOOP-3664. Remove the deprecated method InputFormat.validateInput,
+    which is no longer needed. (tomwhite via omalley)
+
+    HADOOP-3549. Give more meaningful errno's in libhdfs. In particular, 
+    EACCES is returned for permission problems. (Ben Slusky via omalley)
+
+    HADOOP-4036. ResourceStatus was added to TaskTrackerStatus by HADOOP-3759,
+    so increment the InterTrackerProtocol version. (Hemanth Yamijala via 
+    omalley)
+
+    HADOOP-3150. Moves task promotion to tasks. Defines a new interface for
+    committing output files. Moves job setup to jobclient, and moves jobcleanup
+    to a separate task. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3446. Keep map outputs in memory during the reduce. Remove
+    fs.inmemory.size.mb and replace with properties defining in memory map
+    output retention during the shuffle and reduce relative to maximum heap
+    usage. (cdouglas)
+
+    HADOOP-3245. Adds the feature for supporting JobTracker restart. Running
+    jobs can be recovered from the history file. The history file format has
+    been modified to support recovery. The task attempt ID now has the 
+    JobTracker start time to disinguish attempts of the same TIP across 
+    restarts. (Amar Ramesh Kamat via ddas)
+
+    HADOOP-4007. REMOVE DFSFileInfo - FileStatus is sufficient. 
+    (Sanjay Radia via hairong)
+
+    HADOOP-3722. Fixed Hadoop Streaming and Hadoop Pipes to use the Tool
+    interface and GenericOptionsParser. (Enis Soztutar via acmurthy) 
+
+    HADOOP-2816. Cluster summary at name node web reports the space
+    utilization as:
+    Configured Capacity: capacity of all the data directories - Reserved space
+    Present Capacity: Space available for dfs,i.e. remaining+used space
+    DFS Used%: DFS used space/Present Capacity
+    (Suresh Srinivas via hairong)
+
+    HADOOP-3938. Disk space quotas for HDFS. This is similar to namespace
+    quotas in 0.18. (rangadi)
+
+    HADOOP-4293. Make Configuration Writable and remove unreleased 
+    WritableJobConf. Configuration.write is renamed to writeXml. (omalley)
+
+    HADOOP-4281. Change dfsadmin to report available disk space in a format
+    consistent with the web interface as defined in HADOOP-2816. (Suresh
+    Srinivas via cdouglas)
+
+    HADOOP-4430. Further change the cluster summary at name node web that was
+    changed in HADOOP-2816:
+      Non DFS Used - This indicates the disk space taken by non DFS file from
+                     the Configured capacity
+      DFS Used % - DFS Used % of Configured Capacity 
+      DFS Remaining % - Remaing % Configured Capacity available for DFS use
+    DFS command line report reflects the same change. Config parameter 
+    dfs.datanode.du.pct is no longer used and is removed from the 
+    hadoop-default.xml. (Suresh Srinivas via hairong)
+
+    HADOOP-4116. Balancer should provide better resource management. (hairong)
+
+    HADOOP-4599. BlocksMap and BlockInfo made package private. (shv)
+
+  NEW FEATURES
+
+    HADOOP-3341. Allow streaming jobs to specify the field separator for map
+    and reduce input and output. The new configuration values are:
+      stream.map.input.field.separator
+      stream.map.output.field.separator
+      stream.reduce.input.field.separator
+      stream.reduce.output.field.separator
+    All of them default to "\t". (Zheng Shao via omalley)
+
+    HADOOP-3479. Defines the configuration file for the resource manager in 
+    Hadoop. You can configure various parameters related to scheduling, such 
+    as queues and queue properties here. The properties for a queue follow a
+    naming convention,such as, hadoop.rm.queue.queue-name.property-name.
+    (Hemanth Yamijala via ddas)
+
+    HADOOP-3149. Adds a way in which map/reducetasks can create multiple 
+    outputs. (Alejandro Abdelnur via ddas)
+
+    HADOOP-3714.  Add a new contrib, bash-tab-completion, which enables 
+    bash tab completion for the bin/hadoop script. See the README file
+    in the contrib directory for the installation. (Chris Smith via enis)
+
+    HADOOP-3730. Adds a new JobConf constructor that disables loading
+    default configurations. (Alejandro Abdelnur via ddas)
+
+    HADOOP-3772. Add a new Hadoop Instrumentation api for the JobTracker and
+    the TaskTracker, refactor Hadoop Metrics as an implementation of the api.
+    (Ari Rabkin via acmurthy) 
+
+    HADOOP-2302. Provides a comparator for numerical sorting of key fields.
+    (ddas)
+
+    HADOOP-153. Provides a way to skip bad records. (Sharad Agarwal via ddas)
+
+    HADOOP-657. Free disk space should be modelled and used by the scheduler
+    to make scheduling decisions. (Ari Rabkin via omalley)
+
+    HADOOP-3719. Initial checkin of Chukwa, which is a data collection and 
+    analysis framework. (Jerome Boulon, Andy Konwinski, Ari Rabkin, 
+    and Eric Yang)
+
+    HADOOP-3873. Add -filelimit and -sizelimit options to distcp to cap the
+    number of files/bytes copied in a particular run to support incremental
+    updates and mirroring. (TszWo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3585. FailMon package for hardware failure monitoring and 
+    analysis of anomalies. (Ioannis Koltsidas via dhruba)
+
+    HADOOP-1480. Add counters to the C++ Pipes API. (acmurthy via omalley)
+
+    HADOOP-3854. Add support for pluggable servlet filters in the HttpServers.
+    (Tsz Wo (Nicholas) Sze via omalley)
+
+    HADOOP-3759. Provides ability to run memory intensive jobs without 
+    affecting other running tasks on the nodes. (Hemanth Yamijala via ddas)
+
+    HADOOP-3746. Add a fair share scheduler. (Matei Zaharia via omalley)
+
+    HADOOP-3754. Add a thrift interface to access HDFS. (dhruba via omalley)
+
+    HADOOP-3828. Provides a way to write skipped records to DFS.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-3948. Separate name-node edits and fsimage directories.
+    (Lohit Vijayarenu via shv)
+
+    HADOOP-3939. Add an option to DistCp to delete files at the destination
+    not present at the source. (Tsz Wo (Nicholas) Sze via cdouglas)
+
+    HADOOP-3601. Add a new contrib module for Hive, which is a sql-like
+    query processing tool that uses map/reduce. (Ashish Thusoo via omalley)
+
+    HADOOP-3866. Added sort and multi-job updates in the JobTracker web ui.
+    (Craig Weisenfluh via omalley)
+
+    HADOOP-3698. Add access control to control who is allowed to submit or 
+    modify jobs in the JobTracker. (Hemanth Yamijala via omalley)
+
+    HADOOP-1869. Support access times for HDFS files. (dhruba)
+
+    HADOOP-3941. Extend FileSystem API to return file-checksums.
+    (szetszwo)
+
+    HADOOP-3581. Prevents memory intensive user tasks from taking down 
+    nodes. (Vinod K V via ddas)
+
+    HADOOP-3970. Provides a way to recover counters written to JobHistory.
+    (Amar Kamat via ddas)
+
+    HADOOP-3702. Adds ChainMapper and ChainReducer classes allow composing
+    chains of Maps and Reduces in a single Map/Reduce job, something like 
+    MAP+ / REDUCE MAP*. (Alejandro Abdelnur via ddas)
+
+    HADOOP-3445. Add capacity scheduler that provides guaranteed capacities to 
+    queues as a percentage of the cluster. (Vivek Ratan via omalley)
+
+    HADOOP-3992. Add a synthetic load generation facility to the test
+    directory. (hairong via szetszwo)
+
+    HADOOP-3981. Implement a distributed file checksum algorithm in HDFS
+    and change DistCp to use file checksum for comparing src and dst files
+    (szetszwo)
+
+    HADOOP-3829. Narrown down skipped records based on user acceptable value.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-3930. Add common interfaces for the pluggable schedulers and the
+    cli & gui clients. (Sreekanth Ramakrishnan via omalley)
+
+    HADOOP-4176. Implement getFileChecksum(Path) in HftpFileSystem. (szetszwo)
+
+    HADOOP-249. Reuse JVMs across Map-Reduce Tasks. 
+    Configuration changes to hadoop-default.xml:
+      add mapred.job.reuse.jvm.num.tasks
+    (Devaraj Das via acmurthy) 
+
+    HADOOP-4070. Provide a mechanism in Hive for registering UDFs from the
+    query language. (tomwhite)
+
+    HADOOP-2536. Implement a JDBC based database input and output formats to
+    allow Map-Reduce applications to work with databases. (Fredrik Hedberg and
+    Enis Soztutar via acmurthy)
+
+    HADOOP-3019. A new library to support total order partitions.
+    (cdouglas via omalley)
+
+    HADOOP-3924. Added a 'KILLED' job status. (Subramaniam Krishnan via
+    acmurthy) 
+
+  IMPROVEMENTS
+
+    HADOOP-4205. hive: metastore and ql to use the refactored SerDe library.
+    (zshao)
+
+    HADOOP-4106. libhdfs: add time, permission and user attribute support 
+    (part 2). (Pete Wyckoff through zshao)
+
+    HADOOP-4104. libhdfs: add time, permission and user attribute support.
+    (Pete Wyckoff through zshao)
+
+    HADOOP-3908. libhdfs: better error message if llibhdfs.so doesn't exist.
+    (Pete Wyckoff through zshao)
+
+    HADOOP-3732. Delay intialization of datanode block verification till
+    the verification thread is started. (rangadi)
+
+    HADOOP-1627. Various small improvements to 'dfsadmin -report' output.
+    (rangadi)
+
+    HADOOP-3577. Tools to inject blocks into name node and simulated
+    data nodes for testing. (Sanjay Radia via hairong)
+
+    HADOOP-2664. Add a lzop compatible codec, so that files compressed by lzop
+    may be processed by map/reduce. (cdouglas via omalley)
+
+    HADOOP-3655. Add additional ant properties to control junit. (Steve 
+    Loughran via omalley)
+
+    HADOOP-3543. Update the copyright year to 2008. (cdouglas via omalley)
+
+    HADOOP-3587. Add a unit test for the contrib/data_join framework.
+    (cdouglas)
+
+    HADOOP-3402. Add terasort example program (omalley)
+
+    HADOOP-3660. Add replication factor for injecting blocks in simulated
+    datanodes. (Sanjay Radia via cdouglas)
+
+    HADOOP-3684. Add a cloning function to the contrib/data_join framework
+    permitting users to define a more efficient method for cloning values from
+    the reduce than serialization/deserialization. (Runping Qi via cdouglas)
+
+    HADOOP-3478. Improves the handling of map output fetching. Now the
+    randomization is by the hosts (and not the map outputs themselves). 
+    (Jothi Padmanabhan via ddas)
+
+    HADOOP-3617. Removed redundant checks of accounting space in MapTask and
+    makes the spill thread persistent so as to avoid creating a new one for
+    each spill. (Chris Douglas via acmurthy)  
+
+    HADOOP-3412. Factor the scheduler out of the JobTracker and make
+    it pluggable. (Tom White and Brice Arnould via omalley)
+
+    HADOOP-3756. Minor. Remove unused dfs.client.buffer.dir from 
+    hadoop-default.xml. (rangadi)
+
+    HADOOP-3747. Adds counter suport for MultipleOutputs. 
+    (Alejandro Abdelnur via ddas)
+
+    HADOOP-3169. LeaseChecker daemon should not be started in DFSClient
+    constructor. (TszWo (Nicholas), SZE via hairong)
+
+    HADOOP-3824. Move base functionality of StatusHttpServer to a core
+    package. (TszWo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3646. Add a bzip2 compatible codec, so bzip compressed data
+    may be processed by map/reduce. (Abdul Qadeer via cdouglas)
+
+    HADOOP-3861. MapFile.Reader and Writer should implement Closeable.
+    (tomwhite via omalley)
+
+    HADOOP-3791. Introduce generics into ReflectionUtils. (Chris Smith via
+    cdouglas)
+
+    HADOOP-3694. Improve unit test performance by changing
+    MiniDFSCluster to listen only on 127.0.0.1.  (cutting)
+
+    HADOOP-3620. Namenode should synchronously resolve a datanode's network
+    location when the datanode registers. (hairong)
+
+    HADOOP-3860. NNThroughputBenchmark is extended with rename and delete 
+    benchmarks. (shv)
+    
+    HADOOP-3892. Include unix group name in JobConf. (Matei Zaharia via johan)
+
+    HADOOP-3875. Change the time period between heartbeats to be relative to
+    the end of the heartbeat rpc, rather than the start. This causes better
+    behavior if the JobTracker is overloaded. (acmurthy via omalley)
+
+    HADOOP-3853. Move multiple input format (HADOOP-372) extension to 
+    library package. (tomwhite via johan)
+
+    HADOOP-9. Use roulette scheduling for temporary space when the size
+    is not known. (Ari Rabkin via omalley)
+
+    HADOOP-3202. Use recursive delete rather than FileUtil.fullyDelete.
+    (Amareshwari Sriramadasu via omalley)
+
+    HADOOP-3368. Remove common-logging.properties from conf. (Steve Loughran 
+    via omalley)
+
+    HADOOP-3851. Fix spelling mistake in FSNamesystemMetrics. (Steve Loughran 
+    via omalley)
+
+    HADOOP-3780. Remove asynchronous resolution of network topology in the 
+    JobTracker (Amar Kamat via omalley)
+
+    HADOOP-3852. Add ShellCommandExecutor.toString method to make nicer
+    error messages. (Steve Loughran via omalley)
+
+    HADOOP-3844. Include message of local exception in RPC client failures.
+    (Steve Loughran via omalley)
+
+    HADOOP-3935. Split out inner classes from DataNode.java. (johan)
+
+    HADOOP-3905. Create generic interfaces for edit log streams. (shv)
+
+    HADOOP-3062. Add metrics to DataNode and TaskTracker to record network
+    traffic for HDFS reads/writes and MR shuffling. (cdouglas)
+
+    HADOOP-3742. Remove HDFS from public java doc and add javadoc-dev for
+    generative javadoc for developers. (Sanjay Radia via omalley)
+
+    HADOOP-3944. Improve documentation for public TupleWritable class in 
+    join package. (Chris Douglas via enis)
+
+    HADOOP-2330. Preallocate HDFS transaction log to improve performance.
+    (dhruba and hairong)
+
+    HADOOP-3965. Convert DataBlockScanner into a package private class. (shv)
+
+    HADOOP-3488. Prevent hadoop-daemon from rsync'ing log files (Stefan 
+    Groshupf and Craig Macdonald via omalley)
+
+    HADOOP-3342. Change the kill task actions to require http post instead of 
+    get to prevent accidental crawls from triggering it. (enis via omalley)
+
+    HADOOP-3937. Limit the job name in the job history filename to 50 
+    characters. (Matei Zaharia via omalley)
+
+    HADOOP-3943. Remove unnecessary synchronization in 
+    NetworkTopology.pseudoSortByDistance. (hairong via omalley)
+
+    HADOOP-3498. File globbing alternation should be able to span path
+    components. (tomwhite)
+
+    HADOOP-3361. Implement renames for NativeS3FileSystem.
+    (Albert Chern via tomwhite)
+
+    HADOOP-3605. Make EC2 scripts show an error message if AWS_ACCOUNT_ID is
+    unset. (Al Hoang via tomwhite)
+
+    HADOOP-4147. Remove unused class JobWithTaskContext from class
+    JobInProgress. (Amareshwari Sriramadasu via johan)
+
+    HADOOP-4151. Add a byte-comparable interface that both Text and 
+    BytesWritable implement. (cdouglas via omalley)
+
+    HADOOP-4174. Move fs image/edit log methods from ClientProtocol to
+    NamenodeProtocol. (shv via szetszwo)
+
+    HADOOP-4181. Include a .gitignore and saveVersion.sh change to support
+    developing under git. (omalley)
+
+    HADOOP-4186. Factor LineReader out of LineRecordReader. (tomwhite via
+    omalley)
+
+    HADOOP-4184. Break the module dependencies between core, hdfs, and 
+    mapred. (tomwhite via omalley)
+
+    HADOOP-4075. test-patch.sh now spits out ant commands that it runs.
+    (Ramya R via nigel)
+
+    HADOOP-4117. Improve configurability of Hadoop EC2 instances.
+    (tomwhite)
+
+    HADOOP-2411. Add support for larger CPU EC2 instance types.
+    (Chris K Wensel via tomwhite)
+
+    HADOOP-4083. Changed the configuration attribute queue.name to
+    mapred.job.queue.name. (Hemanth Yamijala via acmurthy) 
+
+    HADOOP-4194. Added the JobConf and JobID to job-related methods in
+    JobTrackerInstrumentation for better metrics. (Mac Yang via acmurthy) 
+
+    HADOOP-3975. Change test-patch script to report working the dir
+    modifications preventing the suite from being run. (Ramya R via cdouglas)
+
+    HADOOP-4124. Added a command-line switch to allow users to set job
+    priorities, also allow it to be manipulated via the web-ui. (Hemanth
+    Yamijala via acmurthy) 
+
+    HADOOP-2165. Augmented JobHistory to include the URIs to the tasks'
+    userlogs. (Vinod Kumar Vavilapalli via acmurthy) 
+
+    HADOOP-4062. Remove the synchronization on the output stream when a
+    connection is closed and also remove an undesirable exception when
+    a client is stoped while there is no pending RPC request. (hairong)
+
+    HADOOP-4227. Remove the deprecated class org.apache.hadoop.fs.ShellCommand.
+    (szetszwo)
+
+    HADOOP-4006. Clean up FSConstants and move some of the constants to
+    better places. (Sanjay Radia via rangadi)
+
+    HADOOP-4279. Trace the seeds of random sequences in append unit tests to
+    make itermitant failures reproducible. (szetszwo via cdouglas)
+
+    HADOOP-4209. Remove the change to the format of task attempt id by 
+    incrementing the task attempt numbers by 1000 when the job restarts.
+    (Amar Kamat via omalley)
+
+    HADOOP-4301. Adds forrest doc for the skip bad records feature.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-4354. Separate TestDatanodeDeath.testDatanodeDeath() into 4 tests.
+    (szetszwo)
+
+    HADOOP-3790. Add more unit tests for testing HDFS file append.  (szetszwo)
+
+    HADOOP-4321. Include documentation for the capacity scheduler. (Hemanth 
+    Yamijala via omalley)
+
+    HADOOP-4424. Change menu layout for Hadoop documentation (Boris Shkolnik
+    via cdouglas).
+
+    HADOOP-4438. Update forrest documentation to include missing FsShell
+    commands. (Suresh Srinivas via cdouglas)
+
+    HADOOP-4105.  Add forrest documentation for libhdfs.
+    (Pete Wyckoff via cutting)
+
+    HADOOP-4510. Make getTaskOutputPath public. (Chris Wensel via omalley)
+
+  OPTIMIZATIONS
+
+    HADOOP-3556. Removed lock contention in MD5Hash by changing the 
+    singleton MessageDigester by an instance per Thread using 
+    ThreadLocal. (Iv?n de Prado via omalley)
+
+    HADOOP-3328. When client is writing data to DFS, only the last 
+    datanode in the pipeline needs to verify the checksum. Saves around
+    30% CPU on intermediate datanodes. (rangadi)
+
+    HADOOP-3863. Use a thread-local string encoder rather than a static one
+    that is protected by a lock. (acmurthy via omalley)
+
+    HADOOP-3864. Prevent the JobTracker from locking up when a job is being
+    initialized. (acmurthy via omalley)
+
+    HADOOP-3816. Faster directory listing in KFS. (Sriram Rao via omalley)
+
+    HADOOP-2130. Pipes submit job should have both blocking and non-blocking
+    versions. (acmurthy via omalley)
+
+    HADOOP-3769. Make the SampleMapper and SampleReducer from
+    GenericMRLoadGenerator public, so they can be used in other contexts. 
+    (Lingyun Yang via omalley)
+
+    HADOOP-3514. Inline the CRCs in intermediate files as opposed to reading
+    it from a different .crc file. (Jothi Padmanabhan via ddas)
+
+    HADOOP-3638. Caches the iFile index files in memory to reduce seeks
+    (Jothi Padmanabhan via ddas)
+
+    HADOOP-4225. FSEditLog.logOpenFile() should persist accessTime 
+    rather than modificationTime. (shv)
+
+    HADOOP-4380. Made several new classes (Child, JVMId, 
+    JobTrackerInstrumentation, QueueManager, ResourceEstimator, 
+    TaskTrackerInstrumentation, and TaskTrackerMetricsInst) in 
+    org.apache.hadoop.mapred  package private instead of public. (omalley)
+
+  BUG FIXES
+
+    HADOOP-3563.  Refactor the distributed upgrade code so that it is 
+    easier to identify datanode and namenode related code. (dhruba)
+
+    HADOOP-3640. Fix the read method in the NativeS3InputStream. (tomwhite via
+    omalley)
+
+    HADOOP-3711. Fixes the Streaming input parsing to properly find the 
+    separator. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3725. Prevent TestMiniMRMapDebugScript from swallowing exceptions.
+    (Steve Loughran via cdouglas)
+
+    HADOOP-3726. Throw exceptions from TestCLI setup and teardown instead of
+    swallowing them. (Steve Loughran via cdouglas)
+
+    HADOOP-3721. Refactor CompositeRecordReader and related mapred.join classes
+    to make them clearer. (cdouglas)
+
+    HADOOP-3720. Re-read the config file when dfsadmin -refreshNodes is invoked
+    so dfs.hosts and dfs.hosts.exclude are observed. (lohit vijayarenu via
+    cdouglas)
+
+    HADOOP-3485. Allow writing to files over fuse.
+    (Pete Wyckoff via dhruba)
+
+    HADOOP-3723. The flags to the libhdfs.create call can be treated as
+    a bitmask. (Pete Wyckoff via dhruba)
+
+    HADOOP-3643. Filter out completed tasks when asking for running tasks in
+    the JobTracker web/ui. (Amar Kamat via omalley)
+
+    HADOOP-3777. Ensure that Lzo compressors/decompressors correctly handle the
+    case where native libraries aren't available. (Chris Douglas via acmurthy) 
+
+    HADOOP-3728. Fix SleepJob so that it doesn't depend on temporary files,
+    this ensures we can now run more than one instance of SleepJob
+    simultaneously. (Chris Douglas via acmurthy) 
+
+    HADOOP-3795. Fix saving image files on Namenode with different checkpoint
+    stamps. (Lohit Vijayarenu via mahadev)
+   
+    HADOOP-3624. Improving createeditslog to create tree directory structure.
+    (Lohit Vijayarenu via mahadev)
+
+    HADOOP-3778. DFSInputStream.seek() did not retry in case of some errors.
+    (Luo Ning via rangadi)
+
+    HADOOP-3661. The handling of moving files deleted through fuse-dfs to
+    Trash made similar to the behaviour from dfs shell.
+    (Pete Wyckoff via dhruba)
+
+    HADOOP-3819. Unset LANG and LC_CTYPE in saveVersion.sh to make it
+    compatible with non-English locales. (Rong-En Fan via cdouglas)
+
+    HADOOP-3848. Cache calls to getSystemDir in the TaskTracker instead of
+    calling it for each task start. (acmurthy via omalley)
+
+    HADOOP-3131. Fix reduce progress reporting for compressed intermediate
+    data. (Matei Zaharia via acmurthy) 
+
+    HADOOP-3796. fuse-dfs configuration is implemented as file system
+    mount options. (Pete Wyckoff via dhruba)
+
+    HADOOP-3836. Fix TestMultipleOutputs to correctly clean up. (Alejandro 
+    Abdelnur via acmurthy)
+
+    HADOOP-3805. Improve fuse-dfs write performance.
+    (Pete Wyckoff via zshao)
+
+    HADOOP-3846. Fix unit test CreateEditsLog to generate paths correctly. 
+    (Lohit Vjayarenu via cdouglas)
+    
+    HADOOP-3904. Fix unit tests using the old dfs package name.
+    (TszWo (Nicholas), SZE via johan)
+
+    HADOOP-3319. Fix some HOD error messages to go stderr instead of
+    stdout. (Vinod Kumar Vavilapalli via omalley)
+
+    HADOOP-3907. Move INodeDirectoryWithQuota to its own .java file.
+    (Tsz Wo (Nicholas), SZE via hairong)
+
+    HADOOP-3919. Fix attribute name in hadoop-default for 
+    mapred.jobtracker.instrumentation. (Ari Rabkin via omalley)
+
+    HADOOP-3903. Change the package name for the servlets to be hdfs instead of
+    dfs. (Tsz Wo (Nicholas) Sze via omalley)
+
+    HADOOP-3773. Change Pipes to set the default map output key and value 
+    types correctly. (Koji Noguchi via omalley)
+
+    HADOOP-3952. Fix compilation error in TestDataJoin referencing dfs package.
+    (omalley)
+
+    HADOOP-3951. Fix package name for FSNamesystem logs and modify other
+    hard-coded Logs to use the class name. (cdouglas)
+
+    HADOOP-3889. Improve error reporting from HftpFileSystem, handling in
+    DistCp. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3946. Fix TestMapRed after hadoop-3664. (tomwhite via omalley)
+
+    HADOOP-3949. Remove duplicate jars from Chukwa. (Jerome Boulon via omalley)
+
+    HADOOP-3933. DataNode sometimes sends up to io.byte.per.checksum bytes 
+    more than required to client. (Ning Li via rangadi)
+
+    HADOOP-3962. Shell command "fs -count" should support paths with different
+    file systems. (Tsz Wo (Nicholas), SZE via mahadev)
+
+    HADOOP-3957. Fix javac warnings in DistCp and TestCopyFiles. (Tsz Wo
+    (Nicholas), SZE via cdouglas)
+
+    HADOOP-3958. Fix TestMapRed to check the success of test-job. (omalley via
+    acmurthy)
+
+    HADOOP-3985. Fix TestHDFSServerPorts to use random ports.  (Hairong Kuang 
+    via omalley)
+
+    HADOOP-3964. Fix javadoc warnings introduced by FailMon. (dhruba)
+
+    HADOOP-3785. Fix FileSystem cache to be case-insensitive for scheme and
+    authority. (Bill de hOra via cdouglas)
+
+    HADOOP-3506. Fix a rare NPE caused by error handling in S3. (Tom White via
+    cdouglas)
+
+    HADOOP-3705. Fix mapred.join parser to accept InputFormats named with
+    underscore and static, inner classes. (cdouglas)
+
+    HADOOP-4023. Fix javadoc warnings introduced when the HDFS javadoc was 
+    made private. (omalley)
+
+    HADOOP-4030. Remove lzop from the default list of codecs. (Arun Murthy via
+    cdouglas)
+
+    HADOOP-3961. Fix task disk space requirement estimates for virtual
+    input jobs. Delays limiting task placement until after 10% of the maps
+    have finished. (Ari Rabkin via omalley)
+
+    HADOOP-2168. Fix problem with C++ record reader's progress not being
+    reported to framework. (acmurthy via omalley)
+
+    HADOOP-3966. Copy findbugs generated output files to PATCH_DIR while 
+    running test-patch. (Ramya R via lohit)
+
+    HADOOP-4037. Fix the eclipse plugin for versions of kfs and log4j. (nigel
+    via omalley)
+
+    HADOOP-3950. Cause the Mini MR cluster to wait for task trackers to 
+    register before continuing. (enis via omalley)
+
+    HADOOP-3910. Remove unused ClusterTestDFSNamespaceLogging and
+    ClusterTestDFS. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3954. Disable record skipping by default. (Sharad Agarwal via
+    cdouglas)
+
+    HADOOP-4050. Fix TestFairScheduler to use absolute paths for the work
+    directory. (Matei Zaharia via omalley)
+
+    HADOOP-4069. Keep temporary test files from TestKosmosFileSystem under
+    test.build.data instead of /tmp. (lohit via omalley)
+ 
+    HADOOP-4078. Create test files for TestKosmosFileSystem in separate
+    directory under test.build.data. (lohit)
+
+    HADOOP-3968. Fix getFileBlockLocations calls to use FileStatus instead
+    of Path reflecting the new API. (Pete Wyckoff via lohit)
+
+    HADOOP-3963. libhdfs does not exit on its own, instead it returns error 
+    to the caller and behaves as a true library. (Pete Wyckoff via dhruba)
+
+    HADOOP-4100. Removes the cleanupTask scheduling from the Scheduler 
+    implementations and moves it to the JobTracker. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4097. Make hive work well with speculative execution turned on.
+    (Joydeep Sen Sarma via dhruba)
+
+    HADOOP-4113. Changes to libhdfs to not exit on its own, rather return
+    an error code to the caller. (Pete Wyckoff via dhruba)
+
+    HADOOP-4054. Remove duplicate lease removal during edit log loading.
+    (hairong)
+
+    HADOOP-4071. FSNameSystem.isReplicationInProgress should add an
+    underReplicated block to the neededReplication queue using method 
+    "add" not "update". (hairong)
+
+    HADOOP-4154. Fix type warnings in WritableUtils. (szetszwo via omalley)
+
+    HADOOP-4133. Log files generated by Hive should reside in the 
+    build directory. (Prasad Chakka via dhruba)
+
+    HADOOP-4094. Hive now has hive-default.xml and hive-site.xml similar
+    to core hadoop. (Prasad Chakka via dhruba)
+
+    HADOOP-4112. Handles cleanupTask in JobHistory 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3831. Very slow reading clients sometimes failed while reading.
+    (rangadi)
+
+    HADOOP-4155. Use JobTracker's start time while initializing JobHistory's
+    JobTracker Unique String. (lohit) 
+
+    HADOOP-4099. Fix null pointer when using HFTP from an 0.18 server.
+    (dhruba via omalley)
+
+    HADOOP-3570. Includes user specified libjar files in the client side 
+    classpath path. (Sharad Agarwal via ddas)
+
+    HADOOP-4129. Changed memory limits of TaskTracker and Tasks to be in
+    KiloBytes rather than bytes. (Vinod Kumar Vavilapalli via acmurthy)
+
+    HADOOP-4139. Optimize Hive multi group-by.
+    (Namin Jain via dhruba)
+
+    HADOOP-3911. Add a check to fsck options to make sure -files is not 
+    the first option to resolve conflicts with GenericOptionsParser
+    (lohit)
+
+    HADOOP-3623. Refactor LeaseManager. (szetszwo)
+
+    HADOOP-4125. Handles Reduce cleanup tip on the web ui.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4087. Hive Metastore API for php and python clients.
+    (Prasad Chakka via dhruba)
+
+    HADOOP-4197. Update DATA_TRANSFER_VERSION for HADOOP-3981. (szetszwo)
+
+    HADOOP-4138. Refactor the Hive SerDe library to better structure
+    the interfaces to the serializer and de-serializer.
+    (Zheng Shao via dhruba)
+
+    HADOOP-4195. Close compressor before returning to codec pool.
+    (acmurthy via omalley)
+
+    HADOOP-2403. Escapes some special characters before logging to 
+    history files. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4200. Fix a bug in the test-patch.sh script.
+    (Ramya R via nigel)
+
+    HADOOP-4084. Add explain plan capabilities to Hive Query Language.
+    (Ashish Thusoo via dhruba)
+
+    HADOOP-4121. Preserve cause for exception if the initialization of
+    HistoryViewer for JobHistory fails. (Amareshwari Sri Ramadasu via
+    acmurthy) 
+
+    HADOOP-4213. Fixes NPE in TestLimitTasksPerJobTaskScheduler.
+    (Sreekanth Ramakrishnan via ddas)
+
+    HADOOP-4077. Setting access and modification time for a file
+    requires write permissions on the file. (dhruba)
+
+    HADOOP-3592. Fix a couple of possible file leaks in FileUtil
+    (Bill de hOra via rangadi)
+
+    HADOOP-4120. Hive interactive shell records the time taken by a 
+    query.  (Raghotham Murthy via dhruba)
+
+    HADOOP-4090. The hive scripts pick up hadoop from HADOOP_HOME
+    and then the path. (Raghotham Murthy via dhruba)
+
+    HADOOP-4242. Remove extra ";" in FSDirectory that blocks compilation
+    in some IDE's. (szetszwo via omalley)
+
+    HADOOP-4249. Fix eclipse path to include the hsqldb.jar. (szetszwo via
+    omalley)
+
+    HADOOP-4247. Move InputSampler into org.apache.hadoop.mapred.lib, so that
+    examples.jar doesn't depend on tools.jar. (omalley)
+
+    HADOOP-4269. Fix the deprecation of LineReader by extending the new class
+    into the old name and deprecating it. Also update the tests to test the 
+    new class. (cdouglas via omalley)
+
+    HADOOP-4280. Fix conversions between seconds in C and milliseconds in 
+    Java for access times for files. (Pete Wyckoff via rangadi)
+
+    HADOOP-4254. -setSpaceQuota command does not convert "TB" extenstion to
+    terabytes properly. Implementation now uses StringUtils for parsing this.
+    (Raghu Angadi)
+
+    HADOOP-4259. Findbugs should run over tools.jar also. (cdouglas via 
+    omalley)
+
+    HADOOP-4275. Move public method isJobValidName from JobID to a private
+    method in JobTracker. (omalley)
+
+    HADOOP-4173. fix failures in TestProcfsBasedProcessTree and
+    TestTaskTrackerMemoryManager tests. ProcfsBasedProcessTree and
+    memory management in TaskTracker are disabled on Windows.
+    (Vinod K V via rangadi)
+
+    HADOOP-4189. Fixes the history blocksize & intertracker protocol version
+    issues introduced as part of HADOOP-3245. (Amar Kamat via ddas)
+
+    HADOOP-4190. Fixes the backward compatibility issue with Job History.
+    introduced by HADOOP-3245 and HADOOP-2403. (Amar Kamat via ddas)
+
+    HADOOP-4237. Fixes the TestStreamingBadRecords.testNarrowDown testcase.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-4274. Capacity scheduler accidently modifies the underlying 
+    data structures when browing the job lists. (Hemanth Yamijala via omalley)
+
+    HADOOP-4309. Fix eclipse-plugin compilation. (cdouglas)
+
+    HADOOP-4232. Fix race condition in JVM reuse when multiple slots become
+    free. (ddas via acmurthy) 
+
+    HADOOP-4302. Fix a race condition in TestReduceFetch that can yield false
+    negatvies. (cdouglas)
+
+    HADOOP-3942. Update distcp documentation to include features introduced in
+    HADOOP-3873, HADOOP-3939. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-4319. fuse-dfs dfs_read function returns as many bytes as it is
+    told to read unlesss end-of-file is reached.  (Pete Wyckoff via dhruba)
+
+    HADOOP-4246. Ensure we have the correct lower bound on the number of
+    retries for fetching map-outputs; also fixed the case where the reducer
+    automatically kills on too many unique map-outputs could not be fetched
+    for small jobs. (Amareshwari Sri Ramadasu via acmurthy)  
+
+    HADOOP-4163. Report FSErrors from map output fetch threads instead of
+    merely logging them. (Sharad Agarwal via cdouglas)
+
+    HADOOP-4261. Adds a setup task for jobs. This is required so that we 
+    don't setup jobs that haven't been inited yet (since init could lead
+    to job failure). Only after the init has successfully happened do we 
+    launch the setupJob task. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4256. Removes Completed and Failed Job tables from 
+    jobqueue_details.jsp. (Sreekanth Ramakrishnan via ddas)
+
+    HADOOP-4267. Occasional exceptions during shutting down HSQLDB is logged 
+    but not rethrown. (enis) 
+
+    HADOOP-4018. The number of tasks for a single job cannot exceed a 
+    pre-configured maximum value. (dhruba)
+
+    HADOOP-4288. Fixes a NPE problem in CapacityScheduler. 
+    (Amar Kamat via ddas)
+
+    HADOOP-4014. Create hard links with 'fsutil hardlink' on Windows. (shv)
+
+    HADOOP-4393. Merged org.apache.hadoop.fs.permission.AccessControlException
+    and org.apache.hadoop.security.AccessControlIOException into a single
+    class hadoop.security.AccessControlException. (omalley via acmurthy)
+
+    HADOOP-4287. Fixes an issue to do with maintaining counts of running/pending
+    maps/reduces. (Sreekanth Ramakrishnan via ddas)
+
+    HADOOP-4361. Makes sure that jobs killed from command line are killed
+    fast (i.e., there is a slot to run the cleanup task soon).
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4400. Add "hdfs://" to fs.default.name on quickstart.html.
+    (Jeff Hammerbacher via omalley)
+
+    HADOOP-4378. Fix TestJobQueueInformation to use SleepJob rather than
+    WordCount via TestMiniMRWithDFS. (Sreekanth Ramakrishnan via acmurthy) 
+
+    HADOOP-4376. Fix formatting in hadoop-default.xml for
+    hadoop.http.filter.initializers. (Enis Soztutar via acmurthy) 
+
+    HADOOP-4410. Adds an extra arg to the API FileUtil.makeShellPath to
+    determine whether to canonicalize file paths or not.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4236. Ensure un-initialized jobs are killed correctly on
+    user-demand. (Sharad Agarwal via acmurthy) 
+
+    HADOOP-4373. Fix calculation of Guaranteed Capacity for the
+    capacity-scheduler. (Hemanth Yamijala via acmurthy) 
+
+    HADOOP-4053. Schedulers must be notified when jobs complete. (Amar Kamat via omalley)
+
+    HADOOP-4335. Fix FsShell -ls for filesystems without owners/groups. (David
+    Phillips via cdouglas)
+
+    HADOOP-4426. TestCapacityScheduler broke due to the two commits HADOOP-4053
+    and HADOOP-4373. This patch fixes that. (Hemanth Yamijala via ddas)
+
+    HADOOP-4418. Updates documentation in forrest for Mapred, streaming and pipes.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3155. Ensure that there is only one thread fetching 
+    TaskCompletionEvents on TaskTracker re-init. (Dhruba Borthakur via
+    acmurthy) 
+
+    HADOOP-4425. Fix EditLogInputStream to overload the bulk read method.
+    (cdouglas)
+
+    HADOOP-4427. Adds the new queue/job commands to the manual.
+    (Sreekanth Ramakrishnan via ddas)
+
+    HADOOP-4278. Increase debug logging for unit test TestDatanodeDeath.
+    Fix the case when primary is dead.  (dhruba via szetszwo)
+
+    HADOOP-4423. Keep block length when the block recovery is triggered by
+    append.  (szetszwo)
+
+    HADOOP-4449. Fix dfsadmin usage. (Raghu Angadi via cdouglas)
+
+    HADOOP-4455. Added TestSerDe so that unit tests can run successfully.
+    (Ashish Thusoo via dhruba)
+
+    HADOOP-4457. Fixes an input split logging problem introduced by
+    HADOOP-3245. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4464. Separate out TestFileCreationClient from TestFileCreation.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-4404. saveFSImage() removes files from a storage directory that do 
+    not correspond to its type. (shv)
+
+    HADOOP-4149. Fix handling of updates to the job priority, by changing the
+    list of jobs to be keyed by the priority, submit time, and job tracker id.
+    (Amar Kamat via omalley)
+
+    HADOOP-4296. Fix job client failures by not retiring a job as soon as it
+    is finished. (dhruba)
+
+    HADOOP-4439. Remove configuration variables that aren't usable yet, in
+    particular mapred.tasktracker.tasks.maxmemory and mapred.task.max.memory.
+    (Hemanth Yamijala via omalley)
+
+    HADOOP-4230. Fix for serde2 interface, limit operator, select * operator,
+    UDF trim functions and sampling. (Ashish Thusoo via dhruba)
+
+    HADOOP-4358. No need to truncate access time in INode. Also fixes NPE 
+    in CreateEditsLog. (Raghu Angadi) 
+
+    HADOOP-4387. TestHDFSFileSystemContract fails on windows nightly builds.
+    (Raghu Angadi)
+
+    HADOOP-4466. Ensure that SequenceFileOutputFormat isn't tied to Writables
+    and can be used with other Serialization frameworks. (Chris Wensel via
+    acmurthy)
+
+    HADOOP-4525. Fix ipc.server.ipcnodelay originally missed in in HADOOP-2232.
+    (cdouglas via Clint Morgan)
+
+    HADOOP-4498. Ensure that JobHistory correctly escapes the job name so that
+    regex patterns work. (Chris Wensel via acmurthy)
+
+    HADOOP-4446. Modify guaranteed capacity labels in capacity scheduler's UI
+    to reflect the information being displayed. (Sreekanth Ramakrishnan via 
+    yhemanth)
+
+    HADOOP-4282. Some user facing URLs are not filtered by user filters.
+    (szetszwo)
+
+    HADOOP-4595. Fixes two race conditions - one to do with updating free slot count,
+    and another to do with starting the MapEventsFetcher thread. (ddas)
+
+    HADOOP-4552. Fix a deadlock in RPC server. (Raghu Angadi)
+
+    HADOOP-4471. Sort running jobs by priority in the capacity scheduler.
+    (Amar Kamat via yhemanth) 
+
+    HADOOP-4500. Fix MultiFileSplit to get the FileSystem from the relevant
+    path rather than the JobClient. (Joydeep Sen Sarma via cdouglas)
+
+Release 0.18.4 - Unreleased
+
+  BUG FIXES
+
+    HADOOP-5114. Remove timeout for accept() in DataNode. This makes accept() 
+    fail in JDK on Windows and causes many tests to fail. (Raghu Angadi)
+
+    HADOOP-5192. Block receiver should not remove a block that's created or
+    being written by other threads. (hairong)
+ 
+    HADOOP-5134. FSNamesystem#commitBlockSynchronization adds under-construction
+    block locations to blocksMap. (Dhruba Borthakur via hairong)
+
+    HADOOP-5412. Simulated DataNode should not write to a block that's being
+    written by another thread. (hairong)
+
+    HADOOP-5465. Fix the problem of blocks remaining under-replicated by
+    providing synchronized modification to the counter xmitsInProgress in
+    DataNode. (hairong)
+
+    HADOOP-5557. Fixes some minor problems in TestOverReplicatedBlocks.
+    (szetszwo)
+
+    HADOOP-5644. Namenode is stuck in safe mode. (suresh Srinivas via hairong)
+
+    HADOOP-6017. Lease Manager in NameNode does not handle certain characters
+    in filenames. This results in fatal errors in Secondary NameNode and while
+    restrating NameNode. (Tsz Wo (Nicholas), SZE via rangadi)
+
+Release 0.18.3 - 2009-01-27
+
+  IMPROVEMENTS
+
+    HADOOP-4150. Include librecordio in hadoop releases. (Giridharan Kesavan
+    via acmurthy)
+
+    HADOOP-4668. Improve documentation for setCombinerClass to clarify the
+    restrictions on combiners. (omalley)
+
+  BUG FIXES
+
+    HADOOP-4499. DFSClient should invoke checksumOk only once. (Raghu Angadi)
+
+    HADOOP-4597. Calculate mis-replicated blocks when safe-mode is turned
+    off manually. (shv)
+
+    HADOOP-3121. lsr should keep listing the remaining items but not
+    terminate if there is any IOException. (szetszwo)
+
+    HADOOP-4610. Always calculate mis-replicated blocks when safe-mode is 
+    turned off. (shv)
+
+    HADOOP-3883. Limit namenode to assign at most one generation stamp for
+    a particular block within a short period. (szetszwo)
+
+    HADOOP-4556. Block went missing. (hairong)
+
+    HADOOP-4643. NameNode should exclude excessive replicas when counting
+    live replicas for a block. (hairong)
+
+    HADOOP-4703. Should not wait for proxy forever in lease recovering.
+    (szetszwo)
+
+    HADOOP-4647. NamenodeFsck should close the DFSClient it has created.
+    (szetszwo)
+
+    HADOOP-4616. Fuse-dfs can handle bad values from FileSystem.read call.
+    (Pete Wyckoff via dhruba)
+
+    HADOOP-4061. Throttle Datanode decommission monitoring in Namenode.
+    (szetszwo)
+
+    HADOOP-4659. Root cause of connection failure is being lost to code that
+    uses it for delaying startup. (Steve Loughran and Hairong via hairong)
+
+    HADOOP-4614. Lazily open segments when merging map spills to avoid using
+    too many file descriptors. (Yuri Pradkin via cdouglas)
+
+    HADOOP-4257. The DFS client should pick only one datanode as the candidate
+    to initiate lease recovery.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-4713. Fix librecordio to handle records larger than 64k. (Christian
+    Kunz via cdouglas)
+
+    HADOOP-4635. Fix a memory leak in fuse dfs. (pete wyckoff via mahadev)
+
+    HADOOP-4714. Report status between merges and make the number of records
+    between progress reports configurable. (Jothi Padmanabhan via cdouglas)
+
+    HADOOP-4726. Fix documentation typos "the the". (Edward J. Yoon via
+    szetszwo)
+
+    HADOOP-4679. Datanode prints tons of log messages: waiting for threadgroup
+    to exit, active threads is XX. (hairong)
+
+    HADOOP-4746. Job output directory should be normalized. (hairong)
+
+    HADOOP-4717. Removal of default port# in NameNode.getUri() causes a
+    map/reduce job failed to prompt temporary output. (hairong)
+
+    HADOOP-4778. Check for zero size block meta file when updating a block.
+    (szetszwo)
+
+    HADOOP-4742. Replica gets deleted by mistake. (Wang Xu via hairong)
+
+    HADOOP-4702. Failed block replication leaves an incomplete block in
+    receiver's tmp data directory. (hairong)
+
+    HADOOP-4613. Fix block browsing on Web UI. (Johan Oskarsson via shv)
+
+    HADOOP-4806. HDFS rename should not use src path as a regular expression.
+    (szetszwo)
+
+    HADOOP-4795. Prevent lease monitor getting into an infinite loop when
+    leases and the namespace tree does not match. (szetszwo)
+
+    HADOOP-4620. Fixes Streaming to handle well the cases of map/reduce with empty
+    input/output. (Ravi Gummadi via ddas)
+
+    HADOOP-4857. Fixes TestUlimit to have exactly 1 map in the jobs spawned.
+    (Ravi Gummadi via ddas)
+
+    HADOOP-4810. Data lost at cluster startup time. (hairong)
+
+    HADOOP-4797. Improve how RPC server reads and writes large buffers. Avoids
+    soft-leak of direct buffers and excess copies in NIO layer. (Raghu Angadi)
+
+    HADOOP-4840. TestNodeCount sometimes fails with NullPointerException.
+    (hairong)
+
+    HADOOP-4904. Fix deadlock while leaving safe mode. (shv)
+
+    HADOOP-1980. 'dfsadmin -safemode enter' should prevent the namenode from
+    leaving safemode automatically. (shv & Raghu Angadi)
+
+    HADOOP-4951. Lease monitor should acquire the LeaseManager lock but not the
+    Monitor lock. (szetszwo)
+
+    HADOOP-4935. processMisReplicatedBlocks() should not clear 
+    excessReplicateMap. (shv)
+
+    HADOOP-4961. Fix ConcurrentModificationException in lease recovery 
+    of empty files. (shv)
+
+    HADOOP-4971. A long (unexpected) delay at datanodes could make subsequent
+    block reports from many datanode at the same time. (Raghu Angadi)
+    
+    HADOOP-4910. NameNode should exclude replicas when choosing excessive
+    replicas to delete to avoid data lose. (hairong)
+
+    HADOOP-4983. Fixes a problem in updating Counters in the status reporting.
+    (Amareshwari Sriramadasu via ddas)
+
+Release 0.18.2 - 2008-11-03
+
+  BUG FIXES
+
+    HADOOP-3614. Fix a bug that Datanode may use an old GenerationStamp to get
+    meta file. (szetszwo)
+
+    HADOOP-4314. Simulated datanodes should not include blocks that are still
+    being written in their block report. (Raghu Angadi)
+
+    HADOOP-4228. dfs datanode metrics, bytes_read and bytes_written, overflow
+    due to incorrect type used. (hairong)
+
+    HADOOP-4395. The FSEditLog loading is incorrect for the case OP_SET_OWNER.
+    (szetszwo)
+
+    HADOOP-4351. FSNamesystem.getBlockLocationsInternal throws
+    ArrayIndexOutOfBoundsException. (hairong)
+
+    HADOOP-4403. Make TestLeaseRecovery and TestFileCreation more robust.
+    (szetszwo)
+
+    HADOOP-4292. Do not support append() for LocalFileSystem. (hairong)
+
+    HADOOP-4399. Make fuse-dfs multi-thread access safe.
+    (Pete Wyckoff via dhruba)
+
+    HADOOP-4369. Use setMetric(...) instead of incrMetric(...) for metrics
+    averages.  (Brian Bockelman via szetszwo)
+
+    HADOOP-4469. Rename and add the ant task jar file to the tar file. (nigel)
+
+    HADOOP-3914. DFSClient sends Checksum Ok only once for a block. 
+    (Christian Kunz via hairong)
+ 
+    HADOOP-4467. SerializationFactory now uses the current context ClassLoader
+    allowing for user supplied Serialization instances. (Chris Wensel via
+    acmurthy)
+
+    HADOOP-4517. Release FSDataset lock before joining ongoing create threads.
+    (szetszwo)
+ 
+    HADOOP-4526. fsck failing with NullPointerException. (hairong)
+
+    HADOOP-4483 Honor the max parameter in DatanodeDescriptor.getBlockArray(..)
+    (Ahad Rana and Hairong Kuang via szetszwo)
+
+    HADOOP-4340. Correctly set the exit code from JobShell.main so that the
+    'hadoop jar' command returns the right code to the user. (acmurthy)
+
+  NEW FEATURES
+
+    HADOOP-2421.  Add jdiff output to documentation, listing all API
+    changes from the prior release.  (cutting)
+
+Release 0.18.1 - 2008-09-17
+
+  IMPROVEMENTS
+
+    HADOOP-3934. Upgrade log4j to 1.2.15. (omalley)
+
+  BUG FIXES
+
+    HADOOP-3995. In case of quota failure on HDFS, rename does not restore
+    source filename. (rangadi)
+
+    HADOOP-3821. Prevent SequenceFile and IFile from duplicating codecs in
+    CodecPool when closed more than once. (Arun Murthy via cdouglas)
+
+    HADOOP-4040. Remove coded default of the IPC idle connection timeout
+    from the TaskTracker, which was causing HDFS client connections to not be 
+    collected. (ddas via omalley)
+
+    HADOOP-4046. Made WritableComparable's constructor protected instead of 
+    private to re-enable class derivation. (cdouglas via omalley)
+
+    HADOOP-3940. Fix in-memory merge condition to wait when there are no map
+    outputs or when the final map outputs are being fetched without contention.
+    (cdouglas)
+
+Release 0.18.0 - 2008-08-19
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-2703.  The default options to fsck skips checking files
+    that are being written to. The output of fsck is incompatible
+    with previous release. (lohit vijayarenu via dhruba) 
+
+    HADOOP-2865. FsShell.ls() printout format changed to print file names
+    in the end of the line. (Edward J. Yoon via shv)
+
+    HADOOP-3283. The Datanode has a RPC server. It currently supports
+    two RPCs: the first RPC retrives the metadata about a block and the
+    second RPC sets the generation stamp of an existing block.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2797. Code related to upgrading to 0.14 (Block CRCs) is 
+    removed. As result, upgrade to 0.18 or later from 0.13 or earlier
+    is not supported. If upgrading from 0.13 or earlier is required,
+    please upgrade to an intermediate version (0.14-0.17) and then
+    to this version. (rangadi)
+
+    HADOOP-544. This issue introduces new classes JobID, TaskID and 
+    TaskAttemptID, which should be used instead of their string counterparts.
+    Functions in JobClient, TaskReport, RunningJob, jobcontrol.Job and 
+    TaskCompletionEvent that use string arguments are deprecated in favor 
+    of the corresponding ones that use ID objects. Applications can use 
+    xxxID.toString() and xxxID.forName() methods to convert/restore objects 
+    to/from strings. (Enis Soztutar via ddas)
+
+    HADOOP-2188. RPC client sends a ping rather than throw timeouts.
+    RPC server does not throw away old RPCs. If clients and the server are on
+    different versions, they are not able to function well. In addition,
+    The property ipc.client.timeout is removed from the default hadoop
+    configuration. It also removes metrics RpcOpsDiscardedOPsNum. (hairong)
+
+    HADOOP-2181. This issue adds logging for input splits in Jobtracker log 
+    and jobHistory log. Also adds web UI for viewing input splits in job UI 
+    and history UI. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3226. Run combiners multiple times over map outputs as they
+    are merged in both the map and the reduce tasks. (cdouglas via omalley)
+
+    HADOOP-3329.  DatanodeDescriptor objects should not be stored in the
+    fsimage. (dhruba)
+
+    HADOOP-2656.  The Block object has a generation stamp inside it.
+    Existing blocks get a generation stamp of 0. This is needed to support
+    appends. (dhruba)
+
+    HADOOP-3390. Removed deprecated ClientProtocol.abandonFileInProgress().
+    (Tsz Wo (Nicholas), SZE via rangadi)
+
+    HADOOP-3405. Made some map/reduce internal classes non-public:
+    MapTaskStatus, ReduceTaskStatus, JobSubmissionProtocol, 
+    CompletedJobStatusStore. (enis via omaley)
+
+    HADOOP-3265. Removed depcrecated API getFileCacheHints().
+    (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3310. The namenode instructs the primary datanode to do lease
+    recovery. The block gets a new  generation stamp.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2909. Improve IPC idle connection management. Property
+    ipc.client.maxidletime is removed from the default configuration,
+    instead it is defined as twice of the ipc.client.connection.maxidletime.
+    A connection with outstanding requests won't be treated as idle.
+    (hairong)
+
+    HADOOP-3459. Change in the output format of dfs -ls to more closely match
+    /bin/ls. New format is: perm repl owner group size date name
+    (Mukund Madhugiri via omally)
+
+    HADOOP-3113. An fsync invoked on a HDFS file really really
+    persists data! The datanode moves blocks in the tmp directory to 
+    the real block directory on a datanode-restart. (dhruba)
+
+    HADOOP-3452. Change fsck to return non-zero status for a corrupt
+    FileSystem. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3193. Include the address of the client that found the corrupted
+    block in the log. Also include a CorruptedBlocks metric to track the size
+    of the corrupted block map. (cdouglas)
+
+    HADOOP-3512. Separate out the tools into a tools jar. (omalley)
+
+    HADOOP-3598. Ensure that temporary task-output directories are not created
+    if they are not necessary e.g. for Maps with no side-effect files.
+    (acmurthy)
+
+    HADOOP-3665. Modify WritableComparator so that it only creates instances
+    of the keytype if the type does not define a WritableComparator. Calling
+    the superclass compare will throw a NullPointerException. Also define
+    a RawComparator for NullWritable and permit it to be written as a key
+    to SequenceFiles. (cdouglas)
+
+    HADOOP-3673. Avoid deadlock caused by DataNode RPC receoverBlock().
+    (Tsz Wo (Nicholas), SZE via rangadi)
+
+  NEW FEATURES
+
+    HADOOP-3074. Provides a UrlStreamHandler for DFS and other FS,
+    relying on FileSystem (taton)
+
+    HADOOP-2585. Name-node imports namespace data from a recent checkpoint
+    accessible via a NFS mount. (shv)
+
+    HADOOP-3061. Writable types for doubles and bytes. (Andrzej
+    Bialecki via omalley)
+
+    HADOOP-2857. Allow libhdfs to set jvm options. (Craig Macdonald
+    via omalley)
+
+    HADOOP-3317. Add default port for HDFS namenode.  The port in
+    "hdfs:" URIs now defaults to 8020, so that one may simply use URIs
+    of the form "hdfs://example.com/dir/file". (cutting)
+
+    HADOOP-2019. Adds support for .tar, .tgz and .tar.gz files in 
+    DistributedCache (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3058. Add FSNamesystem status metrics. 
+    (Lohit Vjayarenu via rangadi)
+
+    HADOOP-1915. Allow users to specify counters via strings instead
+    of enumerations. (tomwhite via omalley)
+
+    HADOOP-2065. Delay invalidating corrupt replicas of block until its 
+    is removed from under replicated state. If all replicas are found to 
+    be corrupt, retain all copies and mark the block as corrupt.
+    (Lohit Vjayarenu via rangadi)
+
+    HADOOP-3221. Adds org.apache.hadoop.mapred.lib.NLineInputFormat, which 
+    splits files into splits each of N lines. N can be specified by 
+    configuration property "mapred.line.input.format.linespermap", which
+    defaults to 1. (Amareshwari Sriramadasu via ddas) 
+
+    HADOOP-3336. Direct a subset of annotated FSNamesystem calls for audit
+    logging. (cdouglas)
+
+    HADOOP-3400. A new API FileSystem.deleteOnExit() that facilitates
+    handling of temporary files in HDFS. (dhruba)
+
+    HADOOP-4.  Add fuse-dfs to contrib, permitting one to mount an
+    HDFS filesystem on systems that support FUSE, e.g., Linux.
+    (Pete Wyckoff via cutting)
+
+    HADOOP-3246. Add FTPFileSystem.  (Ankur Goel via cutting)
+
+    HADOOP-3250. Extend FileSystem API to allow appending to files.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3177. Implement Syncable interface for FileSystem.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-1328. Implement user counters in streaming. (tomwhite via
+    omalley)
+
+    HADOOP-3187. Quotas for namespace management. (Hairong Kuang via ddas)
+
+    HADOOP-3307. Support for Archives in Hadoop. (Mahadev Konar via ddas)
+
+    HADOOP-3460. Add SequenceFileAsBinaryOutputFormat to permit direct
+    writes of serialized data. (Koji Noguchi via cdouglas)
+
+    HADOOP-3230. Add ability to get counter values from command
+    line. (tomwhite via omalley)
+
+    HADOOP-930. Add support for native S3 files.  (tomwhite via cutting)
+
+    HADOOP-3502. Quota API needs documentation in Forrest. (hairong)
+
+    HADOOP-3413. Allow SequenceFile.Reader to use serialization
+    framework. (tomwhite via omalley)
+
+    HADOOP-3541. Import of the namespace from a checkpoint documented 
+    in hadoop user guide. (shv)
+
+  IMPROVEMENTS
+
+    HADOOP-3677. Simplify generation stamp upgrade by making is a 
+    local upgrade on datandodes. Deleted distributed upgrade.
+    (rangadi)
+   
+    HADOOP-2928. Remove deprecated FileSystem.getContentLength().
+    (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3130. Make the connect timeout smaller for getFile.
+    (Amar Ramesh Kamat via ddas)
+
+    HADOOP-3160. Remove deprecated exists() from ClientProtocol and 
+    FSNamesystem (Lohit Vjayarenu via rangadi)
+
+    HADOOP-2910. Throttle IPC Clients during bursts of requests or
+    server slowdown. Clients retry connection for up to 15 minutes
+    when socket connection times out. (hairong)
+
+    HADOOP-3295. Allow TextOutputFormat to use configurable spearators.
+    (Zheng Shao via cdouglas).
+
+    HADOOP-3308. Improve QuickSort by excluding values eq the pivot from the
+    partition. (cdouglas)
+
+    HADOOP-2461. Trim property names in configuration.
+    (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-2799. Deprecate o.a.h.io.Closable in favor of java.io.Closable.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3345. Enhance the hudson-test-patch target to cleanup messages,
+    fix minor defects, and add eclipse plugin and python unit tests. (nigel)
+
+    HADOOP-3144. Improve robustness of LineRecordReader by defining a maximum
+    line length (mapred.linerecordreader.maxlength), thereby avoiding reading
+    too far into the following split. (Zheng Shao via cdouglas)
+
+    HADOOP-3334. Move lease handling from FSNamesystem into a seperate class.
+    (Tsz Wo (Nicholas), SZE via rangadi)
+
+    HADOOP-3332. Reduces the amount of logging in Reducer's shuffle phase.
+    (Devaraj Das)
+
+    HADOOP-3355. Enhances Configuration class to accept hex numbers for getInt
+    and getLong. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3350. Add an argument to distcp to permit the user to limit the
+    number of maps. (cdouglas)
+
+    HADOOP-3013. Add corrupt block reporting to fsck.
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-3377. Remove TaskRunner::replaceAll and replace with equivalent
+    String::replace. (Brice Arnould via cdouglas)
+
+    HADOOP-3398. Minor improvement to a utility function in that participates
+    in backoff calculation. (cdouglas)
+
+    HADOOP-3381. Clear referenced when directories are deleted so that 
+    effect of memory leaks are not multiplied. (rangadi)
+
+    HADOOP-2867. Adds the task's CWD to its LD_LIBRARY_PATH. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3232. DU class runs the 'du' command in a seperate thread so
+    that it does not block user. DataNode misses heartbeats in large
+    nodes otherwise. (Johan Oskarsson via rangadi)
+
+    HADOOP-3035. During block transfers between datanodes, the receiving
+    datanode, now can report corrupt replicas received from src node to
+    the namenode. (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3434. Retain the cause of the bind failure in Server::bind.
+    (Steve Loughran via cdouglas)
+
+    HADOOP-3429. Increases the size of the buffers used for the communication
+    for Streaming jobs. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3486. Change default for initial block report to 0 seconds
+    and document it. (Sanjay Radia via omalley)
+
+    HADOOP-3448. Improve the text in the assertion making sure the
+    layout versions are consistent in the data node. (Steve Loughran
+    via omalley)
+
+    HADOOP-2095. Improve the Map-Reduce shuffle/merge by cutting down
+    buffer-copies; changed intermediate sort/merge to use the new IFile format
+    rather than SequenceFiles and compression of map-outputs is now
+    implemented by compressing the entire file rather than SequenceFile
+    compression. Shuffle also has been changed to use a simple byte-buffer
+    manager rather than the InMemoryFileSystem. 
+    Configuration changes to hadoop-default.xml:
+      deprecated mapred.map.output.compression.type 
+    (acmurthy)
+
+    HADOOP-236. JobTacker now refuses connection from a task tracker with a 
+    different version number. (Sharad Agarwal via ddas)
+
+    HADOOP-3427. Improves the shuffle scheduler. It now waits for notifications
+    from shuffle threads when it has scheduled enough, before scheduling more.
+    (ddas)
+
+    HADOOP-2393. Moves the handling of dir deletions in the tasktracker to
+    a separate thread. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3501. Deprecate InMemoryFileSystem. (cutting via omalley)
+
+    HADOOP-3366. Stall the shuffle while in-memory merge is in progress.
+    (acmurthy) 
+
+    HADOOP-2916. Refactor src structure, but leave package structure alone.
+    (Raghu Angadi via mukund) 
+
+    HADOOP-3492. Add forrest documentation for user archives.
+    (Mahadev Konar via hairong)
+
+    HADOOP-3467. Improve documentation for FileSystem::deleteOnExit.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3379. Documents stream.non.zero.exit.status.is.failure for Streaming.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3096. Improves documentation about the Task Execution Environment in 
+    the Map-Reduce tutorial. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2984. Add forrest documentation for DistCp. (cdouglas)
+
+    HADOOP-3406. Add forrest documentation for Profiling.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2762. Add forrest documentation for controls of memory limits on 
+    hadoop daemons and Map-Reduce tasks. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3535. Fix documentation and name of IOUtils.close to
+    reflect that it should only be used in cleanup contexts. (omalley)
+
+    HADOOP-3593. Updates the mapred tutorial. (ddas)
+
+    HADOOP-3547. Documents the way in which native libraries can be distributed
+    via the DistributedCache. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3606. Updates the Streaming doc. (Amareshwari Sriramadasu via ddas) 
+
+    HADOOP-3532. Add jdiff reports to the build scripts. (omalley)
+
+    HADOOP-3100. Develop tests to test the DFS command line interface. (mukund)
+
+    HADOOP-3688. Fix up HDFS docs. (Robert Chansler via hairong)
+
+  OPTIMIZATIONS
+
+    HADOOP-3274. The default constructor of BytesWritable creates empty 
+    byte array. (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-3272. Remove redundant copy of Block object in BlocksMap.
+    (Lohit Vjayarenu via shv)
+
+    HADOOP-3164. Reduce DataNode CPU usage by using FileChannel.tranferTo().
+    On Linux DataNode takes 5 times less CPU while serving data. Results may
+    vary on other platforms. (rangadi)
+
+    HADOOP-3248. Optimization of saveFSImage. (Dhruba via shv)
+
+    HADOOP-3297. Fetch more task completion events from the job
+    tracker and task tracker. (ddas via omalley)
+
+    HADOOP-3364. Faster image and log edits loading. (shv)
+
+    HADOOP-3369. Fast block processing during name-node startup. (shv)
+
+    HADOOP-1702. Reduce buffer copies when data is written to DFS. 
+    DataNodes take 30% less CPU while writing data. (rangadi)
+
+    HADOOP-3095. Speed up split generation in the FileInputSplit,
+    especially for non-HDFS file systems. Deprecates
+    InputFormat.validateInput. (tomwhite via omalley)
+
+    HADOOP-3552. Add forrest documentation for Hadoop commands.
+    (Sharad Agarwal via cdouglas)
+
+  BUG FIXES
+
+    HADOOP-2905. 'fsck -move' triggers NPE in NameNode. 
+    (Lohit Vjayarenu via rangadi)
+
+    Increment ClientProtocol.versionID missed by HADOOP-2585. (shv)
+
+    HADOOP-3254. Restructure internal namenode methods that process
+    heartbeats to use well-defined BlockCommand object(s) instead of 
+    using the base java Object. (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3176.  Change lease record when a open-for-write-file 
+    gets renamed. (dhruba)
+
+    HADOOP-3269.  Fix a case when namenode fails to restart
+    while processing a lease record.  ((Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3282. Port issues in TestCheckpoint resolved. (shv)
+
+    HADOOP-3268. file:// URLs issue in TestUrlStreamHandler under Windows.
+    (taton)
+
+    HADOOP-3127. Deleting files in trash should really remove them.
+    (Brice Arnould via omalley)
+
+    HADOOP-3300. Fix locking of explicit locks in NetworkTopology.
+    (tomwhite via omalley)
+
+    HADOOP-3270. Constant DatanodeCommands are stored in static final
+    immutable variables for better code clarity.  
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2793. Fix broken links for worst performing shuffle tasks in
+    the job history page. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3313. Avoid unnecessary calls to System.currentTimeMillis
+    in RPC::Invoker. (cdouglas)
+
+    HADOOP-3318. Recognize "Darwin" as an alias for "Mac OS X" to
+    support Soylatte. (Sam Pullara via omalley)
+
+    HADOOP-3301. Fix misleading error message when S3 URI hostname
+    contains an underscore. (tomwhite via omalley)
+
+    HADOOP-3338. Fix Eclipse plugin to compile after HADOOP-544 was
+    committed. Updated all references to use the new JobID representation.
+    (taton via nigel)
+
+    HADOOP-3337. Loading FSEditLog was broken by HADOOP-3283 since it 
+    changed Writable serialization of DatanodeInfo. This patch handles it.
+    (Tsz Wo (Nicholas), SZE via rangadi)
+
+    HADOOP-3101. Prevent JobClient from throwing an exception when printing
+    usage. (Edward J. Yoon via cdouglas)
+
+    HADOOP-3119. Update javadoc for Text::getBytes to better describe its
+    behavior. (Tim Nelson via cdouglas)
+
+    HADOOP-2294. Fix documentation in libhdfs to refer to the correct free
+    function. (Craig Macdonald via cdouglas)
+
+    HADOOP-3335. Prevent the libhdfs build from deleting the wrong
+    files on make clean. (cutting via omalley)
+
+    HADOOP-2930. Make {start,stop}-balancer.sh work even if hadoop-daemon.sh
+    is not in the PATH. (Spiros Papadimitriou via hairong)
+
+    HADOOP-3085. Catch Exception in metrics util classes to ensure that
+    misconfigured metrics don't prevent others from updating. (cdouglas)
+
+    HADOOP-3299. CompositeInputFormat should configure the sub-input
+    formats. (cdouglas via omalley)
+
+    HADOOP-3309. Lower io.sort.mb and fs.inmemory.size.mb for MiniMRDFSSort
+    unit test so it passes on Windows. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3348. TestUrlStreamHandler should set URLStreamFactory after
+    DataNodes are initialized. (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3371. Ignore InstanceAlreadyExistsException from
+    MBeanUtil::registerMBean. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3349. A file rename was incorrectly changing the name inside a
+    lease record. (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3365. Removes an unnecessary copy of the key from SegmentDescriptor
+    to MergeQueue. (Devaraj Das)
+
+    HADOOP-3388. Fix for TestDatanodeBlockScanner to handle blocks with
+    generation stamps in them.  (dhruba)
+
+    HADOOP-3203. Fixes TaskTracker::localizeJob to pass correct file sizes
+    for the jarfile and the jobfile. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3391. Fix a findbugs warning introduced by HADOOP-3248 (rangadi)
+
+    HADOOP-3393. Fix datanode shutdown to call DataBlockScanner::shutdown and
+    close its log, even if the scanner thread is not running. (lohit vijayarenu
+    via cdouglas)
+
+    HADOOP-3399. A debug message was logged at info level. (rangadi)
+
+    HADOOP-3396. TestDatanodeBlockScanner occationally fails. 
+    (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3339. Some of the failures on 3rd datanode in DFS write pipelie 
+    are not detected properly. This could lead to hard failure of client's
+    write operation. (rangadi)
+
+    HADOOP-3409. Namenode should save the root inode into fsimage. (hairong)
+
+    HADOOP-3296. Fix task cache to work for more than two levels in the cache
+    hierarchy. This also adds a new counter to track cache hits at levels
+    greater than two. (Amar Kamat via cdouglas)
+
+    HADOOP-3375. Lease paths were sometimes not removed from 
+    LeaseManager.sortedLeasesByPath. (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3424. Values returned by getPartition should be checked to
+    make sure they are in the range 0 to #reduces - 1 (cdouglas via
+    omalley)
+
+    HADOOP-3408. Change FSNamesystem to send its metrics as integers to
+    accommodate collectors that don't support long values. (lohit vijayarenu
+    via cdouglas)
+
+    HADOOP-3403. Fixes a problem in the JobTracker to do with handling of lost
+    tasktrackers. (Arun Murthy via ddas)
+
+    HADOOP-1318. Completed maps are not failed if the number of reducers are
+    zero. (Amareshwari Sriramadasu via ddas).
+
+    HADOOP-3351. Fixes the history viewer tool to not do huge StringBuffer
+    allocations. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3419. Fixes TestFsck to wait for updates to happen before
+    checking results to make the test more reliable. (Lohit Vijaya
+    Renu via omalley)
+
+    HADOOP-3259. Makes failure to read system properties due to a
+    security manager non-fatal. (Edward Yoon via omalley)
+
+    HADOOP-3451. Update libhdfs to use FileSystem::getFileBlockLocations
+    instead of removed getFileCacheHints. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3401. Update FileBench to set the new
+    "mapred.work.output.dir" property to work post-3041. (cdouglas via omalley)
+
+    HADOOP-2669. DFSClient locks pendingCreates appropriately. (dhruba)
+ 
+    HADOOP-3410. Fix KFS implemenation to return correct file
+    modification time.  (Sriram Rao via cutting)
+
+    HADOOP-3340. Fix DFS metrics for BlocksReplicated, HeartbeatsNum, and
+    BlockReportsAverageTime. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3435. Remove the assuption in the scripts that bash is at
+    /bin/bash and fix the test patch to require bash instead of sh.
+    (Brice Arnould via omalley)
+
+    HADOOP-3471. Fix spurious errors from TestIndexedSort and add additional
+    logging to let failures be reproducible. (cdouglas)
+
+    HADOOP-3443. Avoid copying map output across partitions when renaming a
+    single spill. (omalley via cdouglas)
+
+    HADOOP-3454. Fix Text::find to search only valid byte ranges. (Chad Whipkey
+    via cdouglas)
+
+    HADOOP-3417. Removes the static configuration variable,
+    commandLineConfig from JobClient. Moves the cli parsing from
+    JobShell to GenericOptionsParser.  Thus removes the class
+    org.apache.hadoop.mapred.JobShell.  (Amareshwari Sriramadasu via
+    ddas)
+
+    HADOOP-2132. Only RUNNING/PREP jobs can be killed. (Jothi Padmanabhan 
+    via ddas)
+
+    HADOOP-3476. Code cleanup in fuse-dfs.
+    (Peter Wyckoff via dhruba)
+
+    HADOOP-2427. Ensure that the cwd of completed tasks is cleaned-up
+    correctly on task-completion. (Amareshwari Sri Ramadasu via acmurthy) 
+
+    HADOOP-2565. Remove DFSPath cache of FileStatus. 
+    (Tsz Wo (Nicholas), SZE via hairong)
+
+    HADOOP-3326. Cleanup the local-fs and in-memory merge in the ReduceTask by
+    spawing only one thread each for the on-disk and in-memory merge.
+    (Sharad Agarwal via acmurthy)
+
+    HADOOP-3493. Fix TestStreamingFailure to use FileUtil.fullyDelete to
+    ensure correct cleanup. (Lohit Vijayarenu via acmurthy) 
+
+    HADOOP-3455. Fix NPE in ipc.Client in case of connection failure and
+    improve its synchronization. (hairong)
+
+    HADOOP-3240. Fix a testcase to not create files in the current directory.
+    Instead the file is created in the test directory (Mahadev Konar via ddas)
+
+    HADOOP-3496.  Fix failure in TestHarFileSystem.testArchives due to change
+    in HADOOP-3095.  (tomwhite)
+
+    HADOOP-3135. Get the system directory from the JobTracker instead of from
+    the conf. (Subramaniam Krishnan via ddas)
+
+    HADOOP-3503. Fix a race condition when client and namenode start
+    simultaneous recovery of the same block.  (dhruba & Tsz Wo
+    (Nicholas), SZE)
+
+    HADOOP-3440. Fixes DistributedCache to not create symlinks for paths which
+    don't have fragments even when createSymLink is true. 
+    (Abhijit Bagri via ddas) 
+
+    HADOOP-3463. Hadoop-daemons script should cd to $HADOOP_HOME. (omalley)
+
+    HADOOP-3489. Fix NPE in SafeModeMonitor. (Lohit Vijayarenu via shv)
+
+    HADOOP-3509. Fix NPE in FSNamesystem.close. (Tsz Wo (Nicholas), SZE via 
+    shv)
+
+    HADOOP-3491. Name-node shutdown causes InterruptedException in 
+    ResolutionMonitor. (Lohit Vijayarenu via shv)
+
+    HADOOP-3511. Fixes namenode image to not set the root's quota to an
+    invalid value when the quota was not saved in the image. (hairong)
+
+    HADOOP-3516. Ensure the JobClient in HadoopArchives is initialized
+    with a configuration. (Subramaniam Krishnan via omalley)
+
+    HADOOP-3513. Improve NNThroughputBenchmark log messages. (shv)
+
+    HADOOP-3519.  Fix NPE in DFS FileSystem rename.  (hairong via tomwhite)
+    
+    HADOOP-3528. Metrics FilesCreated and files_deleted metrics
+    do not match. (Lohit via Mahadev)
+
+    HADOOP-3418. When a directory is deleted, any leases that point to files
+    in the subdirectory are removed. ((Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3542. Diables the creation of _logs directory for the archives
+    directory. (Mahadev Konar via ddas)
+
+    HADOOP-3544. Fixes a documentation issue for hadoop archives.
+    (Mahadev Konar via ddas)
+
+    HADOOP-3517. Fixes a problem in the reducer due to which the last InMemory
+    merge may be missed. (Arun Murthy via ddas)
+
+    HADOOP-3548. Fixes build.xml to copy all *.jar files to the dist.
+    (Owen O'Malley via ddas)
+
+    HADOOP-3363. Fix unformatted storage detection in FSImage. (shv)
+
+    HADOOP-3560. Fixes a problem to do with split creation in archives.
+    (Mahadev Konar via ddas)
+
+    HADOOP-3545. Fixes a overflow problem in archives.
+    (Mahadev Konar via ddas)
+
+    HADOOP-3561. Prevent the trash from deleting its parent directories.
+    (cdouglas)
+
+    HADOOP-3575. Fix the clover ant target after package refactoring.
+    (Nigel Daley via cdouglas)
+
+    HADOOP-3539.  Fix the tool path in the bin/hadoop script under
+    cygwin. (Tsz Wo (Nicholas), Sze via omalley)
+
+    HADOOP-3520.  TestDFSUpgradeFromImage triggers a race condition in the
+    Upgrade Manager. Fixed. (dhruba)
+
+    HADOOP-3586. Provide deprecated, backwards compatibile semantics for the
+    combiner to be run once and only once on each record. (cdouglas)
+
+    HADOOP-3533. Add deprecated methods to provide API compatibility
+    between 0.18 and 0.17. Remove the deprecated methods in trunk. (omalley)
+
+    HADOOP-3580. Fixes a problem to do with specifying a har as an input to 
+    a job. (Mahadev Konar via ddas)
+
+    HADOOP-3333. Don't assign a task to a tasktracker that it failed to  
+    execute earlier (used to happen in the case of lost tasktrackers where
+    the tasktracker would reinitialize and bind to a different port). 
+    (Jothi Padmanabhan and Arun Murthy via ddas)
+
+    HADOOP-3534. Log IOExceptions that happen in closing the name
+    system when the NameNode shuts down. (Tsz Wo (Nicholas) Sze via omalley)
+
+    HADOOP-3546. TaskTracker re-initialization gets stuck in cleaning up.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3576. Fix NullPointerException when renaming a directory
+    to its subdirectory. (Tse Wo (Nicholas), SZE via hairong)
+
+    HADOOP-3320. Fix NullPointerException in NetworkTopology.getDistance().
+    (hairong)
+
+    HADOOP-3569. KFS input stream read() now correctly reads 1 byte
+    instead of 4. (Sriram Rao via omalley)
+
+    HADOOP-3599. Fix JobConf::setCombineOnceOnly to modify the instance rather
+    than a parameter. (Owen O'Malley via cdouglas)
+
+    HADOOP-3590. Null pointer exception in JobTracker when the task tracker is 
+    not yet resolved. (Amar Ramesh Kamat via ddas)
+
+    HADOOP-3603. Fix MapOutputCollector to spill when io.sort.spill.percent is
+    1.0 and to detect spills when emitted records write no data. (cdouglas)
+
+    HADOOP-3615. Set DatanodeProtocol.versionID to the correct value.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3559. Fix the libhdfs test script and config to work with the
+    current semantics. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3480.  Need to update Eclipse template to reflect current trunk.
+    (Brice Arnould via tomwhite)
+  
+    HADOOP-3588. Fixed usability issues with archives. (mahadev)
+
+    HADOOP-3635. Uncaught exception in DataBlockScanner.
+    (Tsz Wo (Nicholas), SZE via hairong)
+
+    HADOOP-3639. Exception when closing DFSClient while multiple files are
+    open. (Benjamin Gufler via hairong)
+
+    HADOOP-3572. SetQuotas usage interface has some minor bugs. (hairong)
+
+    HADOOP-3649. Fix bug in removing blocks from the corrupted block map.
+    (Lohit Vijayarenu via shv)
+
+    HADOOP-3604. Work around a JVM synchronization problem observed while
+    retrieving the address of direct buffers from compression code by obtaining
+    a lock during this call. (Arun C Murthy via cdouglas)
+
+    HADOOP-3683. Fix dfs metrics to count file listings rather than files
+    listed. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3597. Fix SortValidator to use filesystems other than the default as
+    input. Validation job still runs on default fs.
+    (Jothi Padmanabhan via cdouglas)
+
+    HADOOP-3693. Fix archives, distcp and native library documentation to
+    conform to style guidelines. (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-3653. Fix test-patch target to properly account for Eclipse
+    classpath jars. (Brice Arnould via nigel)
+
+    HADOOP-3692. Fix documentation for Cluster setup and Quick start guides. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3691. Fix streaming and tutorial docs. (Jothi Padmanabhan via ddas)
+
+    HADOOP-3630. Fix NullPointerException in CompositeRecordReader from empty
+    sources (cdouglas)
+
+    HADOOP-3706. Fix a ClassLoader issue in the mapred.join Parser that
+    prevents it from loading user-specified InputFormats.
+    (Jingkei Ly via cdouglas)
+
+    HADOOP-3718. Fix KFSOutputStream::write(int) to output a byte instead of
+    an int, per the OutputStream contract. (Sriram Rao via cdouglas)
+
+    HADOOP-3647. Add debug logs to help track down a very occassional,
+    hard-to-reproduce, bug in shuffle/merge on the reducer. (acmurthy) 
+
+    HADOOP-3716. Prevent listStatus in KosmosFileSystem from returning
+    null for valid, empty directories. (Sriram Rao via cdouglas)
+
+    HADOOP-3752. Fix audit logging to record rename events. (cdouglas)
+
+    HADOOP-3737. Fix CompressedWritable to call Deflater::end to release
+    compressor memory. (Grant Glouser via cdouglas)
+
+    HADOOP-3670. Fixes JobTracker to clear out split bytes when no longer 
+    required. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3755. Update gridmix to work with HOD 0.4 (Runping Qi via cdouglas)
+  
+    HADOOP-3743. Fix -libjars, -files, -archives options to work even if 
+    user code does not implement tools. (Amareshwari Sriramadasu via mahadev)
+
+    HADOOP-3774. Fix typos in shell output. (Tsz Wo (Nicholas), SZE via
+    cdouglas)
+
+    HADOOP-3762. Fixed FileSystem cache to work with the default port. (cutting
+    via omalley)
+
+    HADOOP-3798. Fix tests compilation. (Mukund Madhugiri via omalley)
+
+    HADOOP-3794. Return modification time instead of zero for KosmosFileSystem.
+    (Sriram Rao via cdouglas)
+
+    HADOOP-3806. Remove debug statement to stdout from QuickSort. (cdouglas)
+
+    HADOOP-3776. Fix NPE at NameNode when datanode reports a block after it is
+    deleted at NameNode. (rangadi)
+
+    HADOOP-3537. Disallow adding a datanode to a network topology when its
+    network location is not resolved. (hairong)
+
+    HADOOP-3571. Fix bug in block removal used in lease recovery. (shv)
+
+    HADOOP-3645. MetricsTimeVaryingRate returns wrong value for
+    metric_avg_time. (Lohit Vijayarenu via hairong)
+
+    HADOOP-3521. Reverted the missing cast to float for sending Counters' values
+    to Hadoop metrics which was removed by HADOOP-544. (acmurthy)   
+
+    HADOOP-3820. Fixes two problems in the gridmix-env - a syntax error, and a 
+    wrong definition of USE_REAL_DATASET by default. (Arun Murthy via ddas)
+
+    HADOOP-3724. Fixes two problems related to storing and recovering lease
+    in the fsimage. (dhruba)
+    
+    HADOOP-3827.  Fixed compression of empty map-outputs. (acmurthy) 
+
+    HADOOP-3865. Remove reference to FSNamesystem from metrics preventing
+    garbage collection. (Lohit Vijayarenu via cdouglas)
+
+    HADOOP-3884.  Fix so that Eclipse plugin builds against recent
+    Eclipse releases.  (cutting)
+
+    HADOOP-3837. Streaming jobs report progress status. (dhruba)
+
+    HADOOP-3897. Fix a NPE in secondary namenode. (Lohit Vijayarenu via 
+    cdouglas)
+
+    HADOOP-3901. Fix bin/hadoop to correctly set classpath under cygwin.
+    (Tsz Wo (Nicholas) Sze via omalley)
+
+    HADOOP-3947. Fix a problem in tasktracker reinitialization. 
+    (Amareshwari Sriramadasu via ddas)
+
+Release 0.17.3 - Unreleased
+
+  IMPROVEMENTS
+
+    HADOOP-4164. Chinese translation of the documentation. (Xuebing Yan via 
+    omalley)
+
+  BUG FIXES
+
+    HADOOP-4277. Checksum verification was mistakenly disabled for
+    LocalFileSystem. (Raghu Angadi)
+
+    HADOOP-4271. Checksum input stream can sometimes return invalid 
+    data to the user. (Ning Li via rangadi)
+
+    HADOOP-4318. DistCp should use absolute paths for cleanup.  (szetszwo)
+
+    HADOOP-4326. ChecksumFileSystem does not override create(...) correctly.
+    (szetszwo)
+
+Release 0.17.2 - 2008-08-11
+
+  BUG FIXES
+
+    HADOOP-3678. Avoid spurious exceptions logged at DataNode when clients
+    read from DFS. (rangadi)
+
+    HADOOP-3707. NameNode keeps a count of number of blocks scheduled
+    to be written to a datanode and uses it to avoid allocating more
+    blocks than a datanode can hold. (rangadi)
+
+    HADOOP-3760. Fix a bug with HDFS file close() mistakenly introduced
+    by HADOOP-3681. (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3681. DFSClient can get into an infinite loop while closing
+    a file if there are some errors. (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3002. Hold off block removal while in safe mode. (shv)
+
+    HADOOP-3685. Unbalanced replication target. (hairong)
+
+    HADOOP-3758. Shutdown datanode on version mismatch instead of retrying
+    continuously, preventing excessive logging at the namenode.
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-3633. Correct exception handling in DataXceiveServer, and throttle
+    the number of xceiver threads in a data-node. (shv)
+
+    HADOOP-3370. Ensure that the TaskTracker.runningJobs data-structure is
+    correctly cleaned-up on task completion. (Zheng Shao via acmurthy) 
+
+    HADOOP-3813. Fix task-output clean-up on HDFS to use the recursive 
+    FileSystem.delete rather than the FileUtil.fullyDelete. (Amareshwari
+    Sri Ramadasu via acmurthy)  
+
+    HADOOP-3859. Allow the maximum number of xceivers in the data node to
+    be configurable. (Johan Oskarsson via omalley)
+
+    HADOOP-3931. Fix corner case in the map-side sort that causes some values 
+    to be counted as too large and cause pre-mature spills to disk. Some values
+    will also bypass the combiner incorrectly. (cdouglas via omalley)
+
+Release 0.17.1 - 2008-06-23
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-3565. Fix the Java serialization, which is not enabled by
+    default, to clear the state of the serializer between objects.
+    (tomwhite via omalley)
+
+  IMPROVEMENTS
+
+    HADOOP-3522. Improve documentation on reduce pointing out that
+    input keys and values will be reused. (omalley)
+
+    HADOOP-3487. Balancer uses thread pools for managing its threads;
+    therefore provides better resource management. (hairong)
+
+  BUG FIXES
+
+    HADOOP-2159 Namenode stuck in safemode. The counter blockSafe should
+    not be decremented for invalid blocks. (hairong)
+
+    HADOOP-3472 MapFile.Reader getClosest() function returns incorrect results
+    when before is true (Todd Lipcon via Stack)
+
+    HADOOP-3442. Limit recursion depth on the stack for QuickSort to prevent
+    StackOverflowErrors. To avoid O(n*n) cases, when partitioning depth exceeds
+    a multiple of log(n), change to HeapSort. (cdouglas)
+
+    HADOOP-3477. Fix build to not package contrib/*/bin twice in
+    distributions.  (Adam Heath via cutting)
+
+    HADOOP-3475. Fix MapTask to correctly size the accounting allocation of
+    io.sort.mb. (cdouglas)
+
+    HADOOP-3550. Fix the serialization data structures in MapTask where the
+    value lengths are incorrectly calculated. (cdouglas)
+
+    HADOOP-3526. Fix contrib/data_join framework by cloning values retained
+    in the reduce. (Spyros Blanas via cdouglas)
+
+    HADOOP-1979. Speed up fsck by adding a buffered stream. (Lohit
+    Vijaya Renu via omalley)
+
+Release 0.17.0 - 2008-05-18
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-2786.  Move hbase out of hadoop core
+
+    HADOOP-2345.  New HDFS transactions to support appending 
+    to files.  Disk layout version changed from -11 to -12. (dhruba)
+
+    HADOOP-2192. Error messages from "dfs mv" command improved.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-1902. "dfs du" command without any arguments operates on the
+    current working directory.  (Mahadev Konar via dhruba)
+
+    HADOOP-2873.  Fixed bad disk format introduced by HADOOP-2345.
+    Disk layout version changed from -12 to -13. See changelist 630992
+    (dhruba)
+
+    HADOOP-1985.  This addresses rack-awareness for Map tasks and for 
+    HDFS in a uniform way. (ddas)
+
+    HADOOP-1986.  Add support for a general serialization mechanism for
+    Map Reduce. (tomwhite)
+
+    HADOOP-771. FileSystem.delete() takes an explicit parameter that
+    specifies whether a recursive delete is intended.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2470. Remove getContentLength(String), open(String, long, long)
+    and isDir(String) from ClientProtocol. ClientProtocol version changed
+    from 26 to 27. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-2822. Remove deprecated code for classes InputFormatBase and 
+    PhasedFileSystem. (Amareshwari Sriramadasu via enis)
+
+    HADOOP-2116. Changes the layout of the task execution directory. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2828. The following deprecated methods in Configuration.java
+    have been removed
+        getObject(String name)
+        setObject(String name, Object value)
+        get(String name, Object defaultValue)
+        set(String name, Object value)
+        Iterator entries()
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2824. Removes one deprecated constructor from MiniMRCluster.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2823. Removes deprecated methods getColumn(), getLine() from
+    org.apache.hadoop.record.compiler.generated.SimpleCharStream. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3060. Removes one unused constructor argument from MiniMRCluster.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2854. Remove deprecated o.a.h.ipc.Server::getUserInfo().
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-2563. Remove deprecated FileSystem::listPaths.
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-2818.  Remove deprecated methods in Counters.
+    (Amareshwari Sriramadasu via tomwhite)
+
+    HADOOP-2831. Remove deprecated o.a.h.dfs.INode::getAbsoluteName()
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-2839. Remove deprecated FileSystem::globPaths.
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-2634. Deprecate ClientProtocol::exists.
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-2410.  Make EC2 cluster nodes more independent of each other.
+    Multiple concurrent EC2 clusters are now supported, and nodes may be
+    added to a cluster on the fly with new nodes starting in the same EC2
+    availability zone as the cluster.  Ganglia monitoring and large
+    instance sizes have also been added.  (Chris K Wensel via tomwhite)
+
+    HADOOP-2826. Deprecated FileSplit.getFile(), LineRecordReader.readLine().
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3239. getFileInfo() returns null for non-existing files instead
+    of throwing FileNotFoundException. (Lohit Vijayarenu via shv)
+
+    HADOOP-3266. Removed HOD changes from CHANGES.txt, as they are now inside 
+    src/contrib/hod  (Hemanth Yamijala via ddas)
+
+    HADOOP-3280. Separate the configuration of the virtual memory size
+    (mapred.child.ulimit) from the jvm heap size, so that 64 bit
+    streaming applications are supported even when running with 32 bit
+    jvms. (acmurthy via omalley)
+
+  NEW FEATURES
+
+    HADOOP-1398.  Add HBase in-memory block cache.  (tomwhite)
+
+    HADOOP-2178.  Job History on DFS. (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2063. A new parameter to dfs -get command to fetch a file 
+    even if it is corrupted.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2219. A new command "df -count" that counts the number of
+    files and directories.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2906. Add an OutputFormat capable of using keys, values, and
+    config params to map records to different output files.
+    (Runping Qi via cdouglas)
+
+    HADOOP-2346. Utilities to support timeout while writing to sockets.
+    DFSClient and DataNode sockets have 10min write timeout. (rangadi)
+    
+    HADOOP-2951.  Add a contrib module that provides a utility to
+    build or update Lucene indexes using Map/Reduce.  (Ning Li via cutting)
+
+    HADOOP-1622.  Allow multiple jar files for map reduce.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2055. Allows users to set PathFilter on the FileInputFormat.
+    (Alejandro Abdelnur via ddas)
+
+    HADOOP-2551. More environment variables like HADOOP_NAMENODE_OPTS
+    for better control of HADOOP_OPTS for each component. (rangadi)
+
+    HADOOP-3001. Add job counters that measure the number of bytes
+    read and written to HDFS, S3, KFS, and local file systems. (omalley)
+
+    HADOOP-3048.  A new Interface and a default implementation to convert 
+    and restore serializations of objects to/from strings. (enis)
+
+  IMPROVEMENTS
+
+    HADOOP-2655. Copy on write for data and metadata files in the 
+    presence of snapshots. Needed for supporting appends to HDFS
+    files. (dhruba) 
+
+    HADOOP-1967.  When a Path specifies the same scheme as the default
+    FileSystem but no authority, the default FileSystem's authority is
+    used.  Also add warnings for old-format FileSystem names, accessor
+    methods for fs.default.name, and check for null authority in HDFS.
+    (cutting)
+
+    HADOOP-2895. Let the profiling string be configurable.
+    (Martin Traverso via cdouglas)
+
+    HADOOP-910. Enables Reduces to do merges for the on-disk map output files 
+    in parallel with their copying. (Amar Kamat via ddas)
+
+    HADOOP-730. Use rename rather than copy for local renames. (cdouglas)
+
+    HADOOP-2810. Updated the Hadoop Core logo. (nigel)
+
+    HADOOP-2057.  Streaming should optionally treat a non-zero exit status
+    of a child process as a failed task.  (Rick Cox via tomwhite)
+
+    HADOOP-2765. Enables specifying ulimits for streaming/pipes tasks (ddas)
+
+    HADOOP-2888. Make gridmix scripts more readily configurable and amenable
+    to automated execution. (Mukund Madhugiri via cdouglas)
+
+    HADOOP-2908.  A document that describes the DFS Shell command. 
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2981.  Update README.txt to reflect the upcoming use of
+    cryptography. (omalley)
+
+    HADOOP-2804.  Add support to publish CHANGES.txt as HTML when running
+    the Ant 'docs' target. (nigel)
+
+    HADOOP-2559. Change DFS block placement to allocate the first replica
+    locally, the second off-rack, and the third intra-rack from the
+    second. (lohit vijayarenu via cdouglas)
+
+    HADOOP-2939. Make the automated patch testing process an executable 
+    Ant target, test-patch. (nigel)
+
+    HADOOP-2239. Add HsftpFileSystem to permit transferring files over ssl.
+    (cdouglas)
+
+    HADOOP-2886.  Track individual RPC metrics.
+    (girish vaitheeswaran via dhruba)
+
+    HADOOP-2373. Improvement in safe-mode reporting. (shv)
+
+    HADOOP-3091. Modify FsShell command -put to accept multiple sources.
+    (Lohit Vijaya Renu via cdouglas)
+
+    HADOOP-3092. Show counter values from job -status command.
+    (Tom White via ddas)
+
+    HADOOP-1228.  Ant task to generate Eclipse project files.  (tomwhite)
+
+    HADOOP-3093. Adds Configuration.getStrings(name, default-value) and
+    the corresponding setStrings. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3106. Adds documentation in forrest for debugging.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3099. Add an option to distcp to preserve user, group, and
+    permission information. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-2841. Unwrap AccessControlException and FileNotFoundException
+    from RemoteException for DFSClient. (shv)
+
+    HADOOP-3152.  Make index interval configuable when using
+    MapFileOutputFormat for map-reduce job.  (Rong-En Fan via cutting)
+
+    HADOOP-3143. Decrease number of slaves from 4 to 3 in TestMiniMRDFSSort,
+    as Hudson generates false negatives under the current load.
+    (Nigel Daley via cdouglas)
+
+    HADOOP-3174. Illustrative example for MultipleFileInputFormat. (Enis
+    Soztutar via acmurthy)  
+
+    HADOOP-2993. Clarify the usage of JAVA_HOME in the Quick Start guide.
+    (acmurthy via nigel)
+
+    HADOOP-3124. Make DataNode socket write timeout configurable. (rangadi)
+
+  OPTIMIZATIONS
+
+    HADOOP-2790.  Fixed inefficient method hasSpeculativeTask by removing
+    repetitive calls to get the current time and late checking to see if
+    we want speculation on at all. (omalley)
+
+    HADOOP-2758. Reduce buffer copies in DataNode when data is read from
+    HDFS, without negatively affecting read throughput. (rangadi)
+
+    HADOOP-2399. Input key and value to combiner and reducer is reused.
+    (Owen O'Malley via ddas). 
+
+    HADOOP-2423.  Code optimization in FSNamesystem.mkdirs.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2606. ReplicationMonitor selects data-nodes to replicate directly
+    from needed replication blocks instead of looking up for the blocks for 
+    each live data-node. (shv)
+
+    HADOOP-2148. Eliminate redundant data-node blockMap lookups. (shv)
+
+    HADOOP-2027. Return the number of bytes in each block in a file
+    via a single rpc to the namenode to speed up job planning. 
+    (Lohit Vijaya Renu via omalley)
+
+    HADOOP-2902.  Replace uses of "fs.default.name" with calls to the
+    accessor methods added in HADOOP-1967.  (cutting)
+
+    HADOOP-2119.  Optimize scheduling of jobs with large numbers of
+    tasks by replacing static arrays with lists of runnable tasks. 
+    (Amar Kamat via omalley)
+
+    HADOOP-2919.  Reduce the number of memory copies done during the
+    map output sorting. Also adds two config variables:
+    io.sort.spill.percent - the percentages of io.sort.mb that should
+                            cause a spill (default 80%)
+    io.sort.record.percent - the percent of io.sort.mb that should
+                             hold key/value indexes (default 5%)
+    (cdouglas via omalley)
+
+    HADOOP-3140. Doesn't add a task in the commit queue if the task hadn't
+    generated any output. (Amar Kamat via ddas)
+
+    HADOOP-3168. Reduce the amount of logging in streaming to an
+    exponentially increasing number of records (up to 10,000
+    records/log). (Zheng Shao via omalley)
+ 
+  BUG FIXES
+
+    HADOOP-2195. '-mkdir' behaviour is now closer to Linux shell in case of
+    errors. (Mahadev Konar via rangadi)
+    
+    HADOOP-2190. bring behaviour '-ls' and '-du' closer to Linux shell 
+    commands in case of errors. (Mahadev Konar via rangadi)
+    
+    HADOOP-2193. 'fs -rm' and 'fs -rmr' show error message when the target
+    file does not exist. (Mahadev Konar via rangadi)
+            
+    HADOOP-2738 Text is not subclassable because set(Text) and compareTo(Object)
+    access the other instance's private members directly. (jimk)
+
+    HADOOP-2779.  Remove the references to HBase in the build.xml. (omalley)
+
+    HADOOP-2194. dfs cat on a non-existent file throws FileNotFoundException.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2767. Fix for NetworkTopology erroneously skipping the last leaf 
+    node on a rack. (Hairong Kuang and Mark Butler via dhruba)
+
+    HADOOP-1593. FsShell works with paths in non-default FileSystem.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2191. du and dus command on non-existent directory gives 
+    appropriate error message.  (Mahadev Konar via dhruba)
+
+    HADOOP-2832. Remove tabs from code of DFSClient for better
+    indentation. (dhruba)
+
+    HADOOP-2844. distcp closes file handles for sequence files.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2727. Fix links in Web UI of the hadoop daemons and some docs
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2871. Fixes a problem to do with file: URI in the JobHistory init.
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2800.  Deprecate SetFile.Writer constructor not the whole class.
+    (Johan Oskarsson via tomwhite)
+
+    HADOOP-2891.  DFSClient.close() closes all open files. (dhruba)
+
+    HADOOP-2845.  Fix dfsadmin disk utilization report on Solaris.
+    (Martin Traverso via tomwhite)
+
+    HADOOP-2912. MiniDFSCluster restart should wait for namenode to exit
+    safemode. This was causing TestFsck to fail.  (Mahadev Konar via dhruba)
+
+    HADOOP-2820. The following classes in streaming are removed : 
+    StreamLineRecordReader StreamOutputFormat StreamSequenceRecordReader.
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2819. The following methods in JobConf are removed:
+    getInputKeyClass() setInputKeyClass getInputValueClass()
+    setInputValueClass(Class theClass) setSpeculativeExecution
+    getSpeculativeExecution() (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2817. Removes deprecated mapred.tasktracker.tasks.maximum and 
+    ClusterStatus.getMaxTasks(). (Amareshwari Sri Ramadasu via ddas) 
+
+    HADOOP-2821. Removes deprecated ShellUtil and ToolBase classes from
+    the util package. (Amareshwari Sri Ramadasu via ddas) 
+
+    HADOOP-2934. The namenode was encountreing a NPE while loading
+    leases from the fsimage. Fixed. (dhruba)
+
+    HADOOP-2938. Some fs commands did not glob paths.
+    (Tsz Wo (Nicholas), SZE via rangadi)
+
+    HADOOP-2943. Compression of intermediate map output causes failures
+    in the merge. (cdouglas)
+
+    HADOOP-2870.  DataNode and NameNode closes all connections while
+    shutting down. (Hairong Kuang via dhruba)
+
+    HADOOP-2973. Fix TestLocalDFS for Windows platform.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2971. select multiple times if it returns early in 
+    SocketIOWithTimeout. (rangadi)
+
+    HADOOP-2955. Fix TestCrcCorruption test failures caused by HADOOP-2758
+    (rangadi)
+
+    HADOOP-2657. A flush call on the DFSOutputStream flushes the last
+    partial CRC chunk too.  (dhruba)
+
+    HADOOP-2974. IPC unit tests used "0.0.0.0" to connect to server, which
+    is not always supported. (rangadi)
+
+    HADOOP-2996. Fixes uses of StringBuffer in StreamUtils class.
+    (Dave Brosius via ddas)
+
+    HADOOP-2995. Fixes StreamBaseRecordReader's getProgress to return a 
+    floating point number. (Dave Brosius via ddas)
+
+    HADOOP-2972. Fix for a NPE in FSDataset.invalidate.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2994. Code cleanup for DFSClient: remove redundant 
+    conversions from string to string.  (Dave Brosius via dhruba)
+
+    HADOOP-3009. TestFileCreation sometimes fails because restarting
+    minidfscluster sometimes creates datanodes with ports that are
+    different from their original instance. (dhruba)
+
+    HADOOP-2992. Distributed Upgrade framework works correctly with
+    more than one upgrade object.  (Konstantin Shvachko via dhruba)
+
+    HADOOP-2679. Fix a typo in libhdfs.  (Jason via dhruba)
+
+    HADOOP-2976. When a lease expires, the Namenode ensures that 
+    blocks of the file are adequately replicated. (dhruba)
+
+    HADOOP-2901. Fixes the creation of info servers in the JobClient
+    and JobTracker. Removes the creation from JobClient and removes
+    additional info server from the JobTracker. Also adds the command
+    line utility to view the history files (HADOOP-2896), and fixes
+    bugs in JSPs to do with analysis - HADOOP-2742, HADOOP-2792.
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2890. If different datanodes report the same block but
+    with different sizes to the namenode, the namenode picks the
+    replica(s) with the largest size as the only valid replica(s). (dhruba)
+
+    HADOOP-2825. Deprecated MapOutputLocation.getFile() is removed.
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2806. Fixes a streaming document.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3008. SocketIOWithTimeout throws InterruptedIOException if the
+    thread is interrupted while it is waiting. (rangadi)
+    
+    HADOOP-3006. Fix wrong packet size reported by DataNode when a block
+    is being replicated. (rangadi)
+
+    HADOOP-3029. Datanode prints log message "firstbadlink" only if 
+    it detects a bad connection to another datanode in the pipeline. (dhruba)
+
+    HADOOP-3030. Release reserved space for file in InMemoryFileSystem if
+    checksum reservation fails. (Devaraj Das via cdouglas)
+
+    HADOOP-3036. Fix findbugs warnings in UpgradeUtilities. (Konstantin
+    Shvachko via cdouglas)
+
+    HADOOP-3025. ChecksumFileSystem supports the delete method with 
+    the recursive flag. (Mahadev Konar via dhruba)
+
+    HADOOP-3012. dfs -mv file to user home directory throws exception if 
+    the user home directory does not exist. (Mahadev Konar via dhruba)
+    
+    HADOOP-3066. Should not require superuser privilege to query if hdfs is in
+    safe mode (jimk)
+
+    HADOOP-3040. If the input line starts with the separator char, the key
+    is set as empty. (Amareshwari Sriramadasu via ddas) 
+
+    HADOOP-3080. Removes flush calls from JobHistory.
+    (Amareshwari Sriramadasu via ddas) 
+
+    HADOOP-3086. Adds the testcase missed during commit of hadoop-3040.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3046. Fix the raw comparators for Text and BytesWritables
+    to use the provided length rather than recompute it. (omalley)
+
+    HADOOP-3094. Fix BytesWritable.toString to avoid extending the sign bit
+    (Owen O'Malley via cdouglas)
+
+    HADOOP-3067. DFSInputStream's position read does not close the sockets.
+    (rangadi)
+
+    HADOOP-3073. close() on SocketInputStream or SocketOutputStream should
+    close the underlying channel. (rangadi)
+
+    HADOOP-3087. Fixes a problem to do with refreshing of loadHistory.jsp.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3065. Better logging message if the rack location of a datanode
+    cannot be determined.  (Devaraj Das via dhruba)
+
+    HADOOP-3064. Commas in a file path should not be treated as delimiters.
+    (Hairong Kuang via shv)
+
+    HADOOP-2997. Adds test for non-writable serialier. Also fixes a problem 
+    introduced by HADOOP-2399. (Tom White via ddas)
+
+    HADOOP-3114. Fix TestDFSShell on Windows. (Lohit Vijaya Renu via cdouglas)
+
+    HADOOP-3118.  Fix Namenode NPE while loading fsimage after a cluster 
+    upgrade from older disk format. (dhruba)
+
+    HADOOP-3161. Fix FIleUtil.HardLink.getLinkCount on Mac OS. (nigel
+    via omalley)
+
+    HADOOP-2927. Fix TestDU to acurately calculate the expected file size.
+    (shv via nigel)
+
+    HADOOP-3123. Fix the native library build scripts to work on Solaris.
+    (tomwhite via omalley)
+
+    HADOOP-3089.  Streaming should accept stderr from task before
+    first key arrives.  (Rick Cox via tomwhite)
+
+    HADOOP-3146. A DFSOutputStream.flush method is renamed as
+    DFSOutputStream.fsync.  (dhruba)
+
+    HADOOP-3165. -put/-copyFromLocal did not treat input file "-" as stdin.
+    (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3041. Deprecate JobConf.setOutputPath and JobConf.getOutputPath.
+    Deprecate OutputFormatBase. Add FileOutputFormat. Existing output formats
+    extending OutputFormatBase, now extend FileOutputFormat. Add the following
+    APIs in FileOutputFormat: setOutputPath, getOutputPath, getWorkOutputPath.
+    (Amareshwari Sriramadasu via nigel)
+
+    HADOOP-3083. The fsimage does not store leases. This would have to be
+    reworked in the next release to support appends. (dhruba)
+
+    HADOOP-3166. Fix an ArrayIndexOutOfBoundsException in the spill thread
+    and make exception handling more promiscuous to catch this condition.
+    (cdouglas)
+
+    HADOOP-3050. DataNode sends one and only one block report after
+    it registers with the namenode. (Hairong Kuang)
+
+    HADOOP-3044. NNBench sets the right configuration for the mapper.
+    (Hairong Kuang)
+
+    HADOOP-3178. Fix GridMix scripts for small and medium jobs
+    to handle input paths differently. (Mukund Madhugiri via nigel)
+
+    HADOOP-1911. Fix an infinite loop in DFSClient when all replicas of a
+    block are bad (cdouglas)
+
+    HADOOP-3157. Fix path handling in DistributedCache and TestMiniMRLocalFS.
+    (Doug Cutting via rangadi) 
+
+    HADOOP-3018. Fix the eclipse plug-in contrib wrt removed deprecated
+    methods (taton)
+
+    HADOOP-3183. Fix TestJobShell to use 'ls' instead of java.io.File::exists
+    since cygwin symlinks are unsupported.
+    (Mahadev konar via cdouglas)
+
+    HADOOP-3175. Fix FsShell.CommandFormat to handle "-" in arguments.
+    (Edward J. Yoon via rangadi)
+
+    HADOOP-3220. Safemode message corrected. (shv)
+
+    HADOOP-3208. Fix WritableDeserializer to set the Configuration on
+    deserialized Writables. (Enis Soztutar via cdouglas)
+
+   HADOOP-3224. 'dfs -du /dir' does not return correct size.
+   (Lohit Vjayarenu via rangadi)
+
+   HADOOP-3223. Fix typo in help message for -chmod. (rangadi)
+
+   HADOOP-1373. checkPath() should ignore case when it compares authoriy.
+   (Edward J. Yoon via rangadi)
+
+   HADOOP-3204. Fixes a problem to do with ReduceTask's LocalFSMerger not
+   catching Throwable.  (Amar Ramesh Kamat via ddas)
+
+    HADOOP-3229. Report progress when collecting records from the mapper and
+    the combiner. (Doug Cutting via cdouglas)
+
+    HADOOP-3225. Unwrapping methods of RemoteException should initialize
+    detailedMassage field. (Mahadev Konar, shv, cdouglas)
+
+    HADOOP-3247. Fix gridmix scripts to use the correct globbing syntax and
+    change maxentToSameCluster to run the correct number of jobs.
+    (Runping Qi via cdouglas)
+
+    HADOOP-3242. Fix the RecordReader of SequenceFileAsBinaryInputFormat to
+    correctly read from the start of the split and not the beginning of the
+    file. (cdouglas via acmurthy) 
+
+    HADOOP-3256. Encodes the job name used in the filename for history files.
+    (Arun Murthy via ddas)
+
+    HADOOP-3162. Ensure that comma-separated input paths are treated correctly
+    as multiple input paths. (Amareshwari Sri Ramadasu via acmurthy)
+
+    HADOOP-3263. Ensure that the job-history log file always follows the
+    pattern of hostname_timestamp_jobid_username_jobname even if username
+    and/or jobname are not specfied. This helps to avoid wrong assumptions
+    made about the job-history log filename in jobhistory.jsp. (acmurthy) 
+
+    HADOOP-3251. Fixes getFilesystemName in JobTracker and LocalJobRunner to
+    use FileSystem.getUri instead of FileSystem.getName. (Arun Murthy via ddas)
+
+    HADOOP-3237. Fixes TestDFSShell.testErrOutPut on Windows platform.
+    (Mahadev Konar via ddas)
+
+    HADOOP-3279. TaskTracker checks for SUCCEEDED task status in addition to 
+    COMMIT_PENDING status when it fails maps due to lost map.
+    (Devaraj Das)
+
+    HADOOP-3286. Prevent collisions in gridmix output dirs by increasing the
+    granularity of the timestamp. (Runping Qi via cdouglas)
+
+    HADOOP-3285. Fix input split locality when the splits align to
+    fs blocks. (omalley)
+
+    HADOOP-3372. Fix heap management in streaming tests. (Arun Murthy via
+    cdouglas)
+
+    HADOOP-3031. Fix javac warnings in test classes. (cdouglas)
+
+    HADOOP-3382. Fix memory leak when files are not cleanly closed (rangadi)
+
+    HADOOP-3322. Fix to push MetricsRecord for rpc metrics. (Eric Yang via
+    mukund)
+
+Release 0.16.4 - 2008-05-05
+
+  BUG FIXES
+
+    HADOOP-3138. DFS mkdirs() should not throw an exception if the directory
+    already exists. (rangadi via mukund)
+
+    HADOOP-3294. Fix distcp to check the destination length and retry the copy
+    if it doesn't match the src length. (Tsz Wo (Nicholas), SZE via mukund)
+
+    HADOOP-3186. Fix incorrect permission checkding for mv and renameTo
+    in HDFS. (Tsz Wo (Nicholas), SZE via mukund)
+
+Release 0.16.3 - 2008-04-16
+
+  BUG FIXES
+
+    HADOOP-3010. Fix ConcurrentModificationException in ipc.Server.Responder.
+    (rangadi)
+
+    HADOOP-3154. Catch all Throwables from the SpillThread in MapTask, rather
+    than IOExceptions only. (ddas via cdouglas)
+
+    HADOOP-3159. Avoid file system cache being overwritten whenever
+    configuration is modified. (Tsz Wo (Nicholas), SZE via hairong)
+
+    HADOOP-3139. Remove the consistency check for the FileSystem cache in
+    closeAll() that causes spurious warnings and a deadlock.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3195. Fix TestFileSystem to be deterministic.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3069. Primary name-node should not truncate image when transferring
+    it from the secondary. (shv)
+
+    HADOOP-3182. Change permissions of the job-submission directory to 777
+    from 733 to ensure sharing of HOD clusters works correctly. (Tsz Wo
+    (Nicholas), Sze and Amareshwari Sri Ramadasu via acmurthy) 
+
+Release 0.16.2 - 2008-04-02
+
+  BUG FIXES
+
+    HADOOP-3011. Prohibit distcp from overwriting directories on the
+    destination filesystem with files. (cdouglas)
+
+    HADOOP-3033. The BlockReceiver thread in the datanode writes data to 
+    the block file, changes file position (if needed) and flushes all by
+    itself. The PacketResponder thread does not flush block file. (dhruba)
+
+    HADOOP-2978. Fixes the JobHistory log format for counters.
+    (Runping Qi via ddas)
+
+    HADOOP-2985. Fixes LocalJobRunner to tolerate null job output path.
+    Also makes the _temporary a constant in MRConstants.java.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3003. FileSystem cache key is updated after a 
+    FileSystem object is created. (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3042. Updates the Javadoc in JobConf.getOutputPath to reflect 
+    the actual temporary path. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3007. Tolerate mirror failures while DataNode is replicating
+    blocks as it used to before. (rangadi)
+
+    HADOOP-2944. Fixes a "Run on Hadoop" wizard NPE when creating a
+    Location from the wizard. (taton)
+
+    HADOOP-3049. Fixes a problem in MultiThreadedMapRunner to do with
+    catching RuntimeExceptions. (Alejandro Abdelnur via ddas)
+
+    HADOOP-3039. Fixes a problem to do with exceptions in tasks not
+    killing jobs. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3027. Fixes a problem to do with adding a shutdown hook in
+    FileSystem.  (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3056. Fix distcp when the target is an empty directory by
+    making sure the directory is created first. (cdouglas and acmurthy 
+    via omalley)
+
+    HADOOP-3070. Protect the trash emptier thread from null pointer
+    exceptions. (Koji Noguchi via omalley)
+
+    HADOOP-3084. Fix HftpFileSystem to work for zero-lenghth files.
+    (cdouglas)
+
+    HADOOP-3107. Fix NPE when fsck invokes getListings. (dhruba)
+
+    HADOOP-3104. Limit MultithreadedMapRunner to have a fixed length queue
+    between the RecordReader and the map threads. (Alejandro Abdelnur via
+    omalley)
+
+    HADOOP-2833. Do not use "Dr. Who" as the default user in JobClient. 
+    A valid user name is required. (Tsz Wo (Nicholas), SZE via rangadi)
+
+    HADOOP-3128. Throw RemoteException in setPermissions and setOwner of 
+    DistributedFileSystem.  (shv via nigel)
+
+Release 0.16.1 - 2008-03-13
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-2869. Deprecate SequenceFile.setCompressionType in favor of
+    SequenceFile.createWriter, SequenceFileOutputFormat.setCompressionType,
+    and JobConf.setMapOutputCompressionType. (Arun C Murthy via cdouglas)
+    Configuration changes to hadoop-default.xml:
+      deprecated io.seqfile.compression.type
+
+  IMPROVEMENTS
+
+    HADOOP-2371. User guide for file permissions in HDFS.
+    (Robert Chansler via rangadi)
+
+    HADOOP-3098. Allow more characters in user and group names while
+    using -chown and -chgrp commands. (rangadi)
+    
+  BUG FIXES
+
+    HADOOP-2789. Race condition in IPC Server Responder that could close
+    connections early. (Raghu Angadi)
+    
+    HADOOP-2785. minor. Fix a typo in Datanode block verification 
+    (Raghu Angadi)
+    
+    HADOOP-2788. minor. Fix help message for chgrp shell command (Raghu Angadi).
+    
+    HADOOP-1188. fstime file is updated when a storage directory containing
+    namespace image becomes inaccessible. (shv)
+
+    HADOOP-2787. An application can set a configuration variable named
+    dfs.umask to set the umask that is used by DFS.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2780. The default socket buffer size for DataNodes is 128K.
+    (dhruba)
+
+    HADOOP-2716. Superuser privileges for the Balancer.
+    (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-2754. Filter out .crc files from local file system listing.
+    (Hairong Kuang via shv)
+
+    HADOOP-2733. Fix compiler warnings in test code.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-2725. Modify distcp to avoid leaving partially copied files at
+    the destination after encountering an error. (Tsz Wo (Nicholas), SZE
+    via cdouglas)
+
+    HADOOP-2391. Cleanup job output directory before declaring a job as
+    SUCCESSFUL. (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2808. Minor fix to FileUtil::copy to mind the overwrite
+    formal. (cdouglas)
+
+    HADOOP-2683. Moving UGI out of the RPC Server.
+    (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-2814. Fix for NPE in datanode in unit test TestDataTransferProtocol.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-2811. Dump of counters in job history does not add comma between
+    groups. (runping via omalley)
+
+    HADOOP-2735. Enables setting TMPDIR for tasks. 
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2843. Fix protections on map-side join classes to enable derivation.
+    (cdouglas via omalley)
+
+    HADOOP-2840. Fix gridmix scripts to correctly invoke the java sort through
+    the proper jar. (Mukund Madhugiri via cdouglas)
+
+    HADOOP-2769.  TestNNThroughputBnechmark should not use a fixed port for
+    the namenode http port. (omalley)
+
+    HADOOP-2852. Update gridmix benchmark to avoid an artifically long tail.
+    (cdouglas)
+
+    HADOOP-2894. Fix a problem to do with tasktrackers failing to connect to
+    JobTracker upon reinitialization. (Owen O'Malley via ddas).
+
+    HADOOP-2903.  Fix exception generated by Metrics while using pushMetric().
+    (girish vaitheeswaran via dhruba)
+
+    HADOOP-2904.  Fix to RPC metrics to log the correct host name. 
+    (girish vaitheeswaran via dhruba)
+
+    HADOOP-2918.  Improve error logging so that dfs writes failure with
+    "No lease on file" can be diagnosed. (dhruba)
+
+    HADOOP-2923.  Add SequenceFileAsBinaryInputFormat, which was
+    missed in the commit for HADOOP-2603. (cdouglas via omalley)
+
+    HADOOP-2931. IOException thrown by DFSOutputStream had wrong stack
+    trace in some cases. (Michael Bieniosek via rangadi)
+
+    HADOOP-2883. Write failures and data corruptions on HDFS files.
+    The write timeout is back to what it was on 0.15 release. Also, the
+    datnodes flushes the block file buffered output stream before
+    sending a positive ack for the packet back to the client. (dhruba)
+
+    HADOOP-2756. NPE in DFSClient while closing DFSOutputStreams 
+    under load. (rangadi)
+
+    HADOOP-2958. Fixed FileBench which broke due to HADOOP-2391 which performs
+    a check for existence of the output directory and a trivial bug in
+    GenericMRLoadGenerator where min/max word lenghts were identical since
+    they were looking at the same config variables (Chris Douglas via
+    acmurthy) 
+
+    HADOOP-2915. Fixed FileSystem.CACHE so that a username is included
+    in the cache key. (Tsz Wo (Nicholas), SZE via nigel)
+
+    HADOOP-2813. TestDU unit test uses its own directory to run its 
+    sequence of tests.  (Mahadev Konar via dhruba)
+
+Release 0.16.0 - 2008-02-07
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-1245.  Use the mapred.tasktracker.tasks.maximum value
+    configured on each tasktracker when allocating tasks, instead of
+    the value configured on the jobtracker. InterTrackerProtocol
+    version changed from 5 to 6. (Michael Bieniosek via omalley)
+
+    HADOOP-1843. Removed code from Configuration and JobConf deprecated by 
+    HADOOP-785 and a minor fix to Configuration.toString. Specifically the 
+    important change is that mapred-default.xml is no longer supported and 
+    Configuration no longer supports the notion of default/final resources.
+    (acmurthy) 
+
+    HADOOP-1302.  Remove deprecated abacus code from the contrib directory.
+    This also fixes a configuration bug in AggregateWordCount, so that the
+    job now works.  (enis)
+
+    HADOOP-2288.  Enhance FileSystem API to support access control.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2184.  RPC Support for user permissions and authentication.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-2185.  RPC Server uses any available port if the specified
+    port is zero. Otherwise it uses the specified port. Also combines
+    the configuration attributes for the servers' bind address and
+    port from "x.x.x.x" and "y" to "x.x.x.x:y". 
+    Deprecated configuration variables:
+      dfs.info.bindAddress
+      dfs.info.port
+      dfs.datanode.bindAddress
+      dfs.datanode.port
+      dfs.datanode.info.bindAdress
+      dfs.datanode.info.port
+      dfs.secondary.info.bindAddress
+      dfs.secondary.info.port
+      mapred.job.tracker.info.bindAddress
+      mapred.job.tracker.info.port
+      mapred.task.tracker.report.bindAddress
+      tasktracker.http.bindAddress
+      tasktracker.http.port
+    New configuration variables (post HADOOP-2404):
+      dfs.secondary.http.address
+      dfs.datanode.address
+      dfs.datanode.http.address
+      dfs.http.address
+      mapred.job.tracker.http.address
+      mapred.task.tracker.report.address
+      mapred.task.tracker.http.address
+    (Konstantin Shvachko via dhruba)
+
+    HADOOP-2401.  Only the current leaseholder can abandon a block for
+    a HDFS file.  ClientProtocol version changed from 20 to 21.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2381.  Support permission information in FileStatus. Client
+    Protocol version changed from 21 to 22.  (Raghu Angadi via dhruba)
+
+    HADOOP-2110. Block report processing creates fewer transient objects.
+    Datanode Protocol version changed from 10 to 11.  
+    (Sanjay Radia via dhruba)
+    
+    HADOOP-2567.  Add FileSystem#getHomeDirectory(), which returns the
+    user's home directory in a FileSystem as a fully-qualified path.
+    FileSystem#getWorkingDirectory() is also changed to return a
+    fully-qualified path, which can break applications that attempt
+    to, e.g., pass LocalFileSystem#getWorkingDir().toString() directly
+    to java.io methods that accept file names. (cutting)
+
+    HADOOP-2514.  Change trash feature to maintain a per-user trash
+    directory, named ".Trash" in the user's home directory.  The
+    "fs.trash.root" parameter is no longer used.  Full source paths
+    are also no longer reproduced within the trash.
+
+    HADOOP-2012. Periodic data verification on Datanodes.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1707. The DFSClient does not use a local disk file to cache
+    writes to a HDFS file. Changed Data Transfer Version from 7 to 8.
+    (dhruba)
+
+    HADOOP-2652. Fix permission issues for HftpFileSystem. This is an 
+    incompatible change since distcp may not be able to copy files 
+    from cluster A (compiled with this patch) to cluster B (compiled 
+    with previous versions). (Tsz Wo (Nicholas), SZE via dhruba)
+
+  NEW FEATURES
+
+    HADOOP-1857.  Ability to run a script when a task fails to capture stack
+    traces. (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2299.  Defination of a login interface.  A simple implementation for
+    Unix users and groups. (Hairong Kuang via dhruba)
+
+    HADOOP-1652.  A utility to balance data among datanodes in a HDFS cluster.
+    (Hairong Kuang via dhruba)
+
+    HADOOP-2085.  A library to support map-side joins of consistently 
+    partitioned and sorted data sets. (Chris Douglas via omalley)
+
+    HADOOP-2336. Shell commands to modify file permissions. (rangadi)
+
+    HADOOP-1298. Implement file permissions for HDFS.
+    (Tsz Wo (Nicholas) & taton via cutting)
+
+    HADOOP-2447. HDFS can be configured to limit the total number of 
+    objects (inodes and blocks) in the file system. (dhruba)
+
+    HADOOP-2487. Added an option to get statuses for all submitted/run jobs.
+    This information can be used to develop tools for analysing jobs.
+    (Amareshwari Sri Ramadasu via acmurthy)
+
+    HADOOP-1873. Implement user permissions for Map/Reduce framework.
+    (Hairong Kuang via shv)
+
+    HADOOP-2532.  Add to MapFile a getClosest method that returns the key
+    that comes just before if the key is not present.  (stack via tomwhite)
+   
+    HADOOP-1883. Add versioning to Record I/O. (Vivek Ratan via ddas)
+
+    HADOOP-2603.  Add SeqeunceFileAsBinaryInputFormat, which reads
+    sequence files as BytesWritable/BytesWritable regardless of the
+    key and value types used to write the file. (cdouglas via omalley)
+
+    HADOOP-2367. Add ability to profile a subset of map/reduce tasks and fetch
+    the result to the local filesystem of the submitting application. Also
+    includes a general IntegerRanges extension to Configuration for setting
+    positive, ranged parameters. (Owen O'Malley via cdouglas)
+
+  IMPROVEMENTS
+
+    HADOOP-2045.  Change committer list on website to a table, so that
+    folks can list their organization, timezone, etc.  (cutting)
+
+    HADOOP-2058.  Facilitate creating new datanodes dynamically in
+    MiniDFSCluster. (Hairong Kuang via dhruba)
+
+    HADOOP-1855.  fsck verifies block placement policies and reports
+    violations.  (Konstantin Shvachko via dhruba)
+
+    HADOOP-1604.  An system administrator can finalize namenode upgrades 
+    without running the cluster. (Konstantin Shvachko via dhruba)
+
+    HADOOP-1839.  Link-ify the Pending/Running/Complete/Killed grid in
+    jobdetails.jsp to help quickly narrow down and see categorized TIPs' 
+    details via jobtasks.jsp. (Amar Kamat via acmurthy)
+
+    HADOOP-1210.  Log counters in job history. (Owen O'Malley via ddas)
+
+    HADOOP-1912. Datanode has two new commands COPY and REPLACE. These are
+    needed for supporting data rebalance.  (Hairong Kuang via dhruba)
+
+    HADOOP-2086. This patch adds the ability to add dependencies to a job
+    (run via JobControl) after construction.  (Adrian Woodhead via ddas)
+
+    HADOOP-1185. Support changing the logging level of a server without 
+    restarting the server.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2134.  Remove developer-centric requirements from overview.html and
+    keep it end-user focussed, specifically sections related to subversion and
+    building Hadoop. (Jim Kellerman via acmurthy)
+
+    HADOOP-1989. Support simulated DataNodes. This helps creating large virtual
+    clusters for testing purposes.  (Sanjay Radia via dhruba)
+    
+    HADOOP-1274. Support different number of mappers and reducers per
+    TaskTracker to  allow administrators to better configure and utilize
+    heterogenous clusters. 
+    Configuration changes to hadoop-default.xml:
+      add mapred.tasktracker.map.tasks.maximum (default value of 2)
+      add mapred.tasktracker.reduce.tasks.maximum (default value of 2)
+      remove mapred.tasktracker.tasks.maximum (deprecated for 0.16.0)
+    (Amareshwari Sri Ramadasu via acmurthy) 
+
+    HADOOP-2104. Adds a description to the ant targets. This makes the 
+    output of "ant -projecthelp" sensible. (Chris Douglas via ddas)
+
+    HADOOP-2127. Added a pipes sort example to benchmark trivial pipes
+    application versus trivial java application. (omalley via acmurthy)
+
+    HADOOP-2113. A new shell command "dfs -text" to view the contents of
+    a gziped or SequenceFile. (Chris Douglas via dhruba)
+
+    HADOOP-2207.  Add a "package" target for contrib modules that
+    permits each to determine what files are copied into release
+    builds.  (stack via cutting)
+
+    HADOOP-1984. Makes the backoff for failed fetches exponential. 
+    Earlier, it was a random backoff from an interval. 
+    (Amar Kamat via ddas)
+
+    HADOOP-1327.  Include website documentation for streaming. (Rob Weltman
+    via omalley)
+
+    HADOOP-2000.  Rewrite NNBench to measure namenode performance accurately.
+    It now uses the map-reduce framework for load generation.
+    (Mukund Madhugiri via dhruba)
+
+    HADOOP-2248. Speeds up the framework w.r.t Counters. Also has API
+    updates to the Counters part. (Owen O'Malley via ddas)
+
+    HADOOP-2326. The initial block report at Datanode startup time has
+    a random backoff period.  (Sanjay Radia via dhruba)
+
+    HADOOP-2432. HDFS includes the name of the file while throwing 
+    "File does not exist"  exception. (Jim Kellerman via dhruba)
+
+    HADOOP-2457. Added a 'forrest.home' property to the 'docs' target in
+    build.xml. (acmurthy) 
+
+    HADOOP-2149.  A new benchmark for three name-node operation: file create, 
+    open, and block report, to evaluate the name-node performance 
+    for optimizations or new features. (Konstantin Shvachko via shv)
+
+    HADOOP-2466. Change FileInputFormat.computeSplitSize to a protected
+    non-static method to allow sub-classes to provide alternate
+    implementations. (Alejandro Abdelnur via acmurthy) 
+
+    HADOOP-2425. Change TextOutputFormat to handle Text specifically for better
+    performance. Make NullWritable implement Comparable. Make TextOutputFormat
+    treat NullWritable like null. (omalley)
+
+    HADOOP-1719. Improves the utilization of shuffle copier threads.
+    (Amar Kamat via ddas)
+ 
+    HADOOP-2390. Added documentation for user-controls for intermediate
+    map-outputs & final job-outputs and native-hadoop libraries. (acmurthy) 
+ 
+    HADOOP-1660. Add the cwd of the map/reduce task to the java.library.path
+    of the child-jvm to support loading of native libraries distributed via
+    the DistributedCache. (acmurthy)
+ 
+    HADOOP-2285. Speeds up TextInputFormat. Also includes updates to the
+    Text API. (Owen O'Malley via cdouglas)
+
+    HADOOP-2233. Adds a generic load generator for modeling MR jobs. (cdouglas)
+
+    HADOOP-2369. Adds a set of scripts for simulating a mix of user map/reduce
+    workloads. (Runping Qi via cdouglas)
+
+    HADOOP-2547. Removes use of a 'magic number' in build.xml. 
+    (Hrishikesh via nigel)
+
+    HADOOP-2268. Fix org.apache.hadoop.mapred.jobcontrol classes to use the
+    List/Map interfaces rather than concrete ArrayList/HashMap classes
+    internally. (Adrian Woodhead via acmurthy)
+
+    HADOOP-2406. Add a benchmark for measuring read/write performance through
+    the InputFormat interface, particularly with compression. (cdouglas)
+
+    HADOOP-2131. Allow finer-grained control over speculative-execution. Now
+    users can set it for maps and reduces independently.
+    Configuration changes to hadoop-default.xml:
+      deprecated mapred.speculative.execution
+      add mapred.map.tasks.speculative.execution
+      add mapred.reduce.tasks.speculative.execution
+    (Amareshwari Sri Ramadasu via acmurthy) 
+      
+    HADOOP-1965. Interleave sort/spill in teh map-task along with calls to the
+    Mapper.map method. This is done by splitting the 'io.sort.mb' buffer into
+    two and using one half for collecting map-outputs and the other half for
+    sort/spill. (Amar Kamat via acmurthy)
+    
+    HADOOP-2464. Unit tests for chmod, chown, and chgrp using DFS.
+    (Raghu Angadi)
+
+    HADOOP-1876. Persist statuses of completed jobs in HDFS so that the
+    JobClient can query and get information about decommissioned jobs and also
+    across JobTracker restarts.
+    Configuration changes to hadoop-default.xml:
+      add mapred.job.tracker.persist.jobstatus.active (default value of false)
+      add mapred.job.tracker.persist.jobstatus.hours (default value of 0)
+      add mapred.job.tracker.persist.jobstatus.dir (default value of
+                                                    /jobtracker/jobsInfo)
+    (Alejandro Abdelnur via acmurthy) 
+
+    HADOOP-2077. Added version and build information to STARTUP_MSG for all
+    hadoop daemons to aid error-reporting, debugging etc. (acmurthy) 
+
+    HADOOP-2398. Additional instrumentation for NameNode and RPC server.
+    Add support for accessing instrumentation statistics via JMX.
+    (Sanjay radia via dhruba)
+
+    HADOOP-2449. A return of the non-MR version of NNBench.
+    (Sanjay Radia via shv)
+
+    HADOOP-1989. Remove 'datanodecluster' command from bin/hadoop.
+    (Sanjay Radia via shv)
+
+    HADOOP-1742. Improve JavaDoc documentation for ClientProtocol, DFSClient,
+    and FSNamesystem. (Konstantin Shvachko)
+
+    HADOOP-2298. Add Ant target for a binary-only distribution.
+    (Hrishikesh via nigel)
+
+    HADOOP-2509. Add Ant target for Rat report (Apache license header
+    reports).  (Hrishikesh via nigel)
+
+    HADOOP-2469.  WritableUtils.clone should take a Configuration
+    instead of a JobConf. (stack via omalley)
+
+    HADOOP-2659. Introduce superuser permissions for admin operations.
+    (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-2596. Added a SequenceFile.createWriter api which allows the user
+    to specify the blocksize, replication factor and the buffersize to be
+    used for the underlying HDFS file. (Alejandro Abdelnur via acmurthy) 
+
+    HADOOP-2431. Test HDFS File Permissions. (Hairong Kuang via shv)
+
+    HADOOP-2232. Add an option to disable Nagle's algorithm in the IPC stack.
+    (Clint Morgan via cdouglas)
+
+    HADOOP-2342. Created a micro-benchmark for measuring 
+    local-file versus hdfs reads. (Owen O'Malley via nigel)
+
+    HADOOP-2529. First version of HDFS User Guide. (Raghu Angadi)
+
+    HADOOP-2690. Add jar-test target to build.xml, separating compilation
+    and packaging of the test classes. (Enis Soztutar via cdouglas)
+
+  OPTIMIZATIONS
+
+    HADOOP-1898.  Release the lock protecting the last time of the last stack
+    dump while the dump is happening. (Amareshwari Sri Ramadasu via omalley)
+
+    HADOOP-1900. Makes the heartbeat and task event queries interval 
+    dependent on the cluster size.  (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2208. Counter update frequency (from TaskTracker to JobTracker) is 
+    capped at 1 minute.  (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2284. Reduce the number of progress updates during the sorting in 
+    the map task. (Amar Kamat via ddas)
+
+  BUG FIXES
+
+    HADOOP-2583.  Fixes a bug in the Eclipse plug-in UI to edit locations.
+    Plug-in version is now synchronized with Hadoop version.
+
+    HADOOP-2100.  Remove faulty check for existence of $HADOOP_PID_DIR and let
+    'mkdir -p' check & create it. (Michael Bieniosek via acmurthy)
+
+    HADOOP-1642.  Ensure jobids generated by LocalJobRunner are unique to
+    avoid collissions and hence job-failures. (Doug Cutting via acmurthy) 
+
+    HADOOP-2096.  Close open file-descriptors held by streams while localizing
+    job.xml in the JobTracker and while displaying it on the webui in 
+    jobconf.jsp. (Amar Kamat via acmurthy)
+
+    HADOOP-2098.  Log start & completion of empty jobs to JobHistory, which
+    also ensures that we close the file-descriptor of the job's history log 
+    opened during job-submission. (Amar Kamat via acmurthy)
+
+    HADOOP-2112.  Adding back changes to build.xml lost while reverting
+    HADOOP-1622 i.e. http://svn.apache.org/viewvc?view=rev&revision=588771.
+    (acmurthy)
+
+    HADOOP-2089.  Fixes the command line argument handling to handle multiple
+    -cacheArchive in Hadoop streaming.  (Lohit Vijayarenu via ddas)
+
+    HADOOP-2071.  Fix StreamXmlRecordReader to use a BufferedInputStream
+    wrapped over the DFSInputStream since mark/reset aren't supported by
+    DFSInputStream anymore. (Lohit Vijayarenu via acmurthy)
+
+    HADOOP-1348.  Allow XML comments inside configuration files. 
+    (Rajagopal Natarajan and Enis Soztutar via enis)
+
+    HADOOP-1952.  Improve handling of invalid, user-specified classes while
+    configuring streaming jobs such as combiner, input/output formats etc.
+    Now invalid options are caught, logged and jobs are failed early. (Lohit
+    Vijayarenu via acmurthy)
+
+    HADOOP-2151. FileSystem.globPaths validates the list of Paths that
+    it returns.  (Lohit Vijayarenu via dhruba)
+
+    HADOOP-2121. Cleanup DFSOutputStream when the stream encountered errors
+    when Datanodes became full.  (Raghu Angadi via dhruba)
+
+    HADOOP-1130. The FileSystem.closeAll() method closes all existing
+    DFSClients.  (Chris Douglas via dhruba)
+
+    HADOOP-2204. DFSTestUtil.waitReplication was not waiting for all replicas
+    to get created, thus causing unit test failure.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-2078. An zero size file may have no blocks associated with it.
+    (Konstantin Shvachko via dhruba)
+
+    HADOOP-2212. ChecksumFileSystem.getSumBufferSize might throw 
+    java.lang.ArithmeticException. The fix is to initialize bytesPerChecksum
+    to 0.  (Michael Bieniosek via ddas)
+
+    HADOOP-2216.  Fix jobtasks.jsp to ensure that it first collects the
+    taskids which satisfy the filtering criteria and then use that list to
+    print out only the required task-reports, previously it was oblivious to
+    the filtering and hence used the wrong index into the array of task-reports. 
+    (Amar Kamat via acmurthy)
+
+    HADOOP-2272.  Fix findbugs target to reflect changes made to the location
+    of the streaming jar file by HADOOP-2207.  (Adrian Woodhead via nigel)
+
+    HADOOP-2244.  Fixes the MapWritable.readFields to clear the instance 
+    field variable every time readFields is called. (Michael Stack via ddas).
+
+    HADOOP-2245.  Fixes LocalJobRunner to include a jobId in the mapId. Also,  
+    adds a testcase for JobControl. (Adrian Woodhead via ddas).
+
+    HADOOP-2275. Fix erroneous detection of corrupted file when namenode 
+    fails to allocate any datanodes for newly allocated block.
+    (Dhruba Borthakur via dhruba)
+
+    HADOOP-2256. Fix a buf in the namenode that could cause it to encounter
+    an infinite loop while deleting excess replicas that were created by 
+    block rebalancing.  (Hairong Kuang via dhruba)
+
+    HADOOP-2209. SecondaryNamenode process exits if it encounters exceptions 
+    that it cannot handle.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-2314. Prevent TestBlockReplacement from occasionally getting
+    into an infinite loop.  (Hairong Kuang via dhruba)
+
+    HADOOP-2300. This fixes a bug where mapred.tasktracker.tasks.maximum
+    would be ignored even if it was set in hadoop-site.xml.
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2349.  Improve code layout in file system transaction logging code.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2368.  Fix unit tests on Windows.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2363.  This fix allows running multiple instances of the unit test
+    in parallel. The bug was introduced in HADOOP-2185 that changed
+    port-rolling behaviour.  (Konstantin Shvachko via dhruba)
+
+    HADOOP-2271.  Fix chmod task to be non-parallel. (Adrian Woodhead via
+    omalley)
+
+    HADOOP-2313.  Fail the build if building libhdfs fails. (nigel via omalley)
+
+    HADOOP-2359.  Remove warning for interruptted exception when closing down
+    minidfs. (dhruba via omalley)
+
+    HADOOP-1841. Prevent slow clients from consuming threads in the NameNode. 
+    (dhruba)
+    
+    HADOOP-2323. JobTracker.close() should not print stack traces for
+    normal exit.  (jimk via cutting)
+
+    HADOOP-2376. Prevents sort example from overriding the number of maps.
+    (Owen O'Malley via ddas)
+
+    HADOOP-2434. FSDatasetInterface read interface causes HDFS reads to occur 
+    in 1 byte chunks, causing performance degradation.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-2459. Fix package target so that src/docs/build files are not
+    included in the release.  (nigel)
+
+    HADOOP-2215.  Fix documentation in cluster_setup.html &
+    mapred_tutorial.html reflect that mapred.tasktracker.tasks.maximum has
+    been superceeded by mapred.tasktracker.{map|reduce}.tasks.maximum. 
+    (Amareshwari Sri Ramadasu via acmurthy)
+
+    HADOOP-2459. Fix package target so that src/docs/build files are not
+    included in the release.  (nigel)
+
+    HADOOP-2352. Remove AC_CHECK_LIB for libz and liblzo to ensure that
+    libhadoop.so doesn't have a dependency on them. (acmurthy) 
+
+    HADOOP-2453. Fix the configuration for wordcount-simple example in Hadoop 
+    Pipes which currently produces an XML parsing error. (Amareshwari Sri
+    Ramadasu via acmurthy)
+
+    HADOOP-2476. Unit test failure while reading permission bits of local
+    file system (on Windows) fixed.  (Raghu Angadi via dhruba)
+
+    HADOOP-2247.  Fine-tune the strategies for killing mappers and reducers
+    due to failures while fetching map-outputs. Now the map-completion times
+    and number of currently running reduces are taken into account by the
+    JobTracker before  killing the mappers, while the progress made by the
+    reducer and the number of fetch-failures vis-a-vis total number of
+    fetch-attempts are taken into account before teh reducer kills itself.
+    (Amar Kamat via acmurthy)
+    
+    HADOOP-2452. Fix eclipse plug-in build.xml to refers to the right
+    location where hadoop-*-core.jar is generated. (taton)
+
+    HADOOP-2492. Additional debugging in the rpc server to better 
+    diagnose ConcurrentModificationException. (dhruba)
+
+    HADOOP-2344. Enhance the utility for executing shell commands to read the
+    stdout/stderr streams while waiting for the command to finish (to free up
+    the buffers). Also, this patch throws away stderr of the DF utility.
+    @deprecated 
+      org.apache.hadoop.fs.ShellCommand for org.apache.hadoop.util.Shell
+      org.apache.hadoop.util.ShellUtil for 
+        org.apache.hadoop.util.Shell.ShellCommandExecutor
+    (Amar Kamat via acmurthy)
+
+    HADOOP-2511. Fix a javadoc warning in org.apache.hadoop.util.Shell
+    introduced by HADOOP-2344. (acmurthy) 
+
+    HADOOP-2442. Fix TestLocalFileSystemPermission.testLocalFSsetOwner
+    to work on more platforms. (Raghu Angadi via nigel)
+
+    HADOOP-2488. Fix a regression in random read performance.
+    (Michael Stack via rangadi)
+
+    HADOOP-2523. Fix TestDFSShell.testFilePermissions on Windows.
+    (Raghu Angadi via nigel)
+
+    HADOOP-2535. Removed support for deprecated mapred.child.heap.size and
+    fixed some indentation issues in TaskRunner. (acmurthy)
+    Configuration changes to hadoop-default.xml:
+      remove mapred.child.heap.size
+
+    HADOOP-2512. Fix error stream handling in Shell. Use exit code to
+    detect shell command errors in RawLocalFileSystem. (Raghu Angadi)
+
+    HADOOP-2446. Fixes TestHDFSServerPorts and TestMRServerPorts so they
+    do not rely on statically configured ports and cleanup better. (nigel)
+
+    HADOOP-2537. Make build process compatible with Ant 1.7.0.
+    (Hrishikesh via nigel)
+
+    HADOOP-1281. Ensure running tasks of completed map TIPs (e.g. speculative
+    tasks) are killed as soon as the TIP completed. (acmurthy)
+
+    HADOOP-2571. Suppress a suprious warning in test code. (cdouglas)
+
+    HADOOP-2481. NNBench report its progress periodically.
+    (Hairong Kuang via dhruba)
+
+    HADOOP-2601. Start name-node on a free port for TestNNThroughputBenchmark.
+    (Konstantin Shvachko)
+
+    HADOOP-2494.  Set +x on contrib/*/bin/* in packaged tar bundle.
+    (stack via tomwhite)
+
+    HADOOP-2605. Remove bogus leading slash in task-tracker report bindAddress.
+    (Konstantin Shvachko)
+    
+    HADOOP-2620. Trivial. 'bin/hadoop fs -help' did not list chmod, chown, and
+    chgrp. (Raghu Angadi)
+
+    HADOOP-2614. The DFS WebUI accesses are configured to be from the user
+    specified by dfs.web.ugi.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2543. Implement a "no-permission-checking" mode for smooth
+    upgrade from a pre-0.16 install of HDFS.
+    (Hairong Kuang via dhruba)
+
+    HADOOP-290. A DataNode log message now prints the target of a replication
+    request correctly. (dhruba)
+
+    HADOOP-2538. Redirect to a warning, if plaintext parameter is true but 
+    the filter parameter is not given in TaskLogServlet.  
+    (Michael Bieniosek via enis)
+
+    HADOOP-2582. Prevent 'bin/hadoop fs -copyToLocal' from creating
+    zero-length files when the src does not exist.
+    (Lohit Vijayarenu via cdouglas)
+
+    HADOOP-2189. Incrementing user counters should count as progress. (ddas)
+
+    HADOOP-2649. The NameNode periodically computes replication work for
+    the datanodes. The periodicity of this computation is now configurable.
+    (dhruba)
+
+    HADOOP-2549. Correct disk size computation so that data-nodes could switch 
+    to other local drives if current is full. (Hairong Kuang via shv)
+
+    HADOOP-2633. Fsck should call name-node methods directly rather than 
+    through rpc. (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-2687. Modify a few log message generated by dfs client to be
+    logged only at INFO level. (stack via dhruba)
+
+    HADOOP-2402. Fix BlockCompressorStream to ensure it buffers data before
+    sending it down to the compressor so that each write call doesn't
+    compress. (Chris Douglas via acmurthy) 
+
+    HADOOP-2645. The Metrics initialization code does not throw
+    exceptions when servers are restarted by MiniDFSCluster.
+    (Sanjay Radia via dhruba)
+
+    HADOOP-2691. Fix a race condition that was causing the DFSClient
+    to erroneously remove a good datanode from a pipeline that actually
+    had another datanode that was bad. (dhruba)
+
+    HADOOP-1195. All code in FSNamesystem checks the return value
+    of getDataNode for null before using it. (dhruba)
+
+    HADOOP-2640. Fix a bug in MultiFileSplitInputFormat that was always
+    returning 1 split in some circumstances. (Enis Soztutar via nigel)
+
+    HADOOP-2626. Fix paths with special characters to work correctly
+    with the local filesystem.  (Thomas Friol via cutting)
+
+    HADOOP-2646. Fix SortValidator to work with fully-qualified 
+    working directories.  (Arun C Murthy via nigel)
+
+    HADOOP-2092. Added a ping mechanism to the pipes' task to periodically
+    check if the parent Java task is running, and exit if the parent isn't
+    alive and responding. (Amareshwari Sri Ramadasu via acmurthy) 
+
+    HADOOP-2714. TestDecommission failed on windows because the replication
+    request was timing out. (dhruba)
+
+    HADOOP-2576. Namenode performance degradation over time triggered by
+    large heartbeat interval. (Raghu Angadi)
+
+    HADOOP-2713. TestDatanodeDeath failed on windows because the replication
+    request was timing out. (dhruba)
+
+    HADOOP-2639. Fixes a problem to do with incorrect maintenance of values 
+    for runningMapTasks/runningReduceTasks. (Amar Kamat and Arun Murthy 
+    via ddas)
+
+    HADOOP-2723. Fixed the check for checking whether to do user task
+    profiling. (Amareshwari Sri Ramadasu via omalley)
+
+    HADOOP-2734. Link forrest docs to new http://hadoop.apache.org
+    (Doug Cutting via nigel)
+
+    HADOOP-2641. Added Apache license headers to 95 files. (nigel)
+
+    HADOOP-2732. Fix bug in path globbing.  (Hairong Kuang via nigel)
+
+    HADOOP-2404. Fix backwards compatability with hadoop-0.15 configuration
+    files that was broken by HADOOP-2185. (omalley)
+
+    HADOOP-2755. Fix fsck performance degradation because of permissions 
+    issue.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2768. Fix performance regression caused by HADOOP-1707.
+    (dhruba borthakur via nigel)
+
+    HADOOP-3108. Fix NPE in setPermission and setOwner. (shv)
+
+Release 0.15.3 - 2008-01-18
+
+  BUG FIXES
+
+    HADOOP-2562. globPaths supports {ab,cd}.  (Hairong Kuang via dhruba)
+
+    HADOOP-2540. fsck reports missing blocks incorrectly. (dhruba)
+
+    HADOOP-2570. "work" directory created unconditionally, and symlinks
+    created from the task cwds.
+
+    HADOOP-2574. Fixed mapred_tutorial.xml to correct minor errors with the
+    WordCount examples. (acmurthy) 
+
+Release 0.15.2 - 2008-01-02
+
+  BUG FIXES
+
+    HADOOP-2246.  Moved the changelog for HADOOP-1851 from the NEW FEATURES 
+    section to the INCOMPATIBLE CHANGES section. (acmurthy)
+
+    HADOOP-2238.  Fix TaskGraphServlet so that it sets the content type of 
+    the response appropriately.  (Paul Saab via enis)
+
+    HADOOP-2129.  Fix so that distcp works correctly when source is
+    HDFS but not the default filesystem.  HDFS paths returned by the
+    listStatus() method are now fully-qualified.  (cutting)
+
+    HADOOP-2378.  Fixes a problem where the last task completion event would
+    get created after the job completes. (Alejandro Abdelnur via ddas)
+
+    HADOOP-2228.  Checks whether a job with a certain jobId is already running
+    and then tries to create the JobInProgress object. 
+    (Johan Oskarsson via ddas)
+
+    HADOOP-2422.  dfs -cat multiple files fail with 'Unable to write to 
+    output stream'.  (Raghu Angadi via dhruba)
+
+    HADOOP-2460.  When the namenode encounters ioerrors on writing a
+    transaction log, it stops writing new transactions to that one.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-2227.  Use the LocalDirAllocator uniformly for handling all of the
+    temporary storage required for a given task. It also implies that
+    mapred.local.dir.minspacestart is handled by checking if there is enough
+    free-space on any one of the available disks. (Amareshwari Sri Ramadasu
+    via acmurthy)
+
+    HADOOP-2437.  Fix the LocalDirAllocator to choose the seed for the
+    round-robin disk selections randomly. This helps in spreading data across
+    multiple partitions much better. (acmurhty)
+
+    HADOOP-2486. When the list of files from the InMemoryFileSystem is obtained
+    for merging, this patch will ensure that only those files whose checksums
+    have also got created (renamed) are returned. (ddas)
+
+    HADOOP-2456. Hardcode English locale to prevent NumberFormatException
+    from occurring when starting the NameNode with certain locales.
+    (Matthias Friedrich via nigel)
+
+  IMPROVEMENTS
+
+    HADOOP-2160.  Remove project-level, non-user documentation from
+    releases, since it's now maintained in a separate tree.  (cutting)
+
+    HADOOP-1327.  Add user documentation for streaming.  (cutting)
+
+    HADOOP-2382.  Add hadoop-default.html to subversion. (cutting)
+
+    HADOOP-2158. hdfsListDirectory calls FileSystem.listStatus instead
+    of FileSystem.listPaths. This reduces the number of RPC calls on the
+    namenode, thereby improving scalability.  (Christian Kunz via dhruba)
+
+Release 0.15.1 - 2007-11-27
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-713.  Reduce CPU usage on namenode while listing directories.
+    FileSystem.listPaths does not return the size of the entire subtree.
+    Introduced a new API ClientProtocol.getContentLength that returns the
+    size of the subtree. (Dhruba Borthakur via dhruba)
+
+  IMPROVEMENTS
+
+    HADOOP-1917.  Addition of guides/tutorial for better overall
+    documentation for Hadoop. Specifically: 
+    * quickstart.html is targetted towards first-time users and helps them 
+      setup a single-node cluster and play with Hadoop. 
+    * cluster_setup.html helps admins to configure and setup non-trivial
+      hadoop clusters.
+    * mapred_tutorial.html is a comprehensive Map-Reduce tutorial. 
+    (acmurthy) 
+
+  BUG FIXES
+
+    HADOOP-2174.  Removed the unnecessary Reporter.setStatus call from
+    FSCopyFilesMapper.close which led to a NPE since the reporter isn't valid
+    in the close method. (Chris Douglas via acmurthy) 
+
+    HADOOP-2172.  Restore performance of random access to local files
+    by caching positions of local input streams, avoiding a system
+    call. (cutting)
+
+    HADOOP-2205.  Regenerate the Hadoop website since some of the changes made
+    by HADOOP-1917 weren't correctly copied over to the trunk/docs directory. 
+    Also fixed a couple of minor typos and broken links. (acmurthy)
+
+Release 0.15.0 - 2007-11-2
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-1708.  Make files appear in namespace as soon as they are
+    created.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-999.  A HDFS Client immediately informs the NameNode of a new
+    file creation.  ClientProtocol version changed from 14 to 15.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-932.  File locking interfaces and implementations (that were
+    earlier deprecated) are removed.  Client Protocol version changed 
+    from 15 to 16.  (Raghu Angadi via dhruba)
+
+    HADOOP-1621.  FileStatus is now a concrete class and FileSystem.listPaths
+    is deprecated and replaced with listStatus. (Chris Douglas via omalley)
+
+    HADOOP-1656.  The blockSize of a file is stored persistently in the file
+    inode. (Dhruba Borthakur via dhruba)
+
+    HADOOP-1838.  The blocksize of files created with an earlier release is
+    set to the default block size.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-785.  Add support for 'final' Configuration parameters,
+    removing support for 'mapred-default.xml', and changing
+    'hadoop-site.xml' to not override other files.  Now folks should
+    generally use 'hadoop-site.xml' for all configurations.  Values
+    with a 'final' tag may not be overridden by subsequently loaded
+    configuration files, e.g., by jobs.  (Arun C. Murthy via cutting)
+
+    HADOOP-1846. DatanodeReport in ClientProtocol can report live 
+    datanodes, dead datanodes or all datanodes. Client Protocol version
+    changed from 17 to 18.  (Hairong Kuang via dhruba)
+
+    HADOOP-1851.  Permit specification of map output compression type
+    and codec, independent of the final output's compression
+    parameters.  (Arun C Murthy via cutting)
+
+    HADOOP-1819.  Jobtracker cleanups, including binding ports before
+    clearing state directories, so that inadvertently starting a
+    second jobtracker doesn't trash one that's already running. Removed
+    method JobTracker.getTracker() because the static variable, which
+    stored the value caused initialization problems.
+    (omalley via cutting)
+
+  NEW FEATURES
+
+    HADOOP-89.  A client can access file data even before the creator
+    has closed the file. Introduce a new command "tail" from dfs shell.
+    (Dhruba Borthakur via dhruba)
+
+    HADOOP-1636.  Allow configuration of the number of jobs kept in
+    memory by the JobTracker.  (Michael Bieniosek via omalley)
+
+    HADOOP-1667.  Reorganize CHANGES.txt into sections to make it
+    easier to read.  Also remove numbering, to make merging easier.
+    (cutting)
+
+    HADOOP-1610.  Add metrics for failed tasks.
+    (Devaraj Das via tomwhite)
+
+    HADOOP-1767.  Add "bin/hadoop job -list" sub-command. (taton via cutting)
+
+    HADOOP-1351.  Add "bin/hadoop job [-fail-task|-kill-task]" sub-commands
+    to terminate a particular task-attempt. (Enis Soztutar via acmurthy)
+
+    HADOOP-1880. SleepJob : An example job that sleeps at each map and 
+    reduce task. (enis)
+
+    HADOOP-1809. Add a link in web site to #hadoop IRC channel. (enis)
+
+    HADOOP-1894. Add percentage graphs and mapred task completion graphs 
+    to Web User Interface. Users not using Firefox may install a plugin to 
+    their browsers to see svg graphics. (enis)
+
+    HADOOP-1914. Introduce a new NamenodeProtocol to allow secondary 
+    namenodes and rebalancing processes to communicate with a primary 
+    namenode.  (Hairong Kuang via dhruba)
+
+    HADOOP-1963.  Add a FileSystem implementation for the Kosmos
+    Filesystem (KFS).  (Sriram Rao via cutting)
+
+    HADOOP-1822.  Allow the specialization and configuration of socket
+    factories. Provide a StandardSocketFactory, and a SocksSocketFactory to
+    allow the use of SOCKS proxies. (taton).
+
+    HADOOP-1968. FileSystem supports wildcard input syntax "{ }".
+    (Hairong Kuang via dhruba)
+
+    HADOOP-2566. Add globStatus method to the FileSystem interface
+    and deprecate globPath and listPath. (Hairong Kuang via hairong)
+
+  OPTIMIZATIONS
+
+    HADOOP-1910.  Reduce the number of RPCs that DistributedFileSystem.create()
+    makes to the namenode. (Raghu Angadi via dhruba)
+
+    HADOOP-1565.  Reduce memory usage of NameNode by replacing 
+    TreeMap in HDFS Namespace with ArrayList.  
+    (Dhruba Borthakur via dhruba)
+
+    HADOOP-1743.  Change DFS INode from a nested class to standalone
+    class, with specialized subclasses for directories and files, to
+    save memory on the namenode.  (Konstantin Shvachko via cutting)
+
+    HADOOP-1759.  Change file name in INode from String to byte[],
+    saving memory on the namenode. (Konstantin Shvachko via cutting)
+
+    HADOOP-1766.  Save memory in namenode by having BlockInfo extend
+    Block, and replace many uses of Block with BlockInfo.
+    (Konstantin Shvachko via cutting)
+
+    HADOOP-1687.  Save memory in namenode by optimizing BlockMap
+    representation.  (Konstantin Shvachko via cutting)
+
+    HADOOP-1774. Remove use of INode.parent in Block CRC upgrade.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1788.  Increase the buffer size on the Pipes command socket.
+    (Amareshwari Sri Ramadasu and Christian Kunz via omalley)
+
+  BUG FIXES
+
+    HADOOP-1946.  The Datanode code does not need to invoke du on
+    every heartbeat.  (Hairong Kuang via dhruba)
+
+    HADOOP-1935. Fix a NullPointerException in internalReleaseCreate.
+    (Dhruba Borthakur)
+
+    HADOOP-1933. The nodes listed in include and exclude files 
+    are always listed in the datanode report.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1953. The job tracker should wait beteween calls to try and delete 
+    the system directory (Owen O'Malley via devaraj)
+
+    HADOOP-1932. TestFileCreation fails with message saying filestatus.dat
+    is of incorrect size.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-1573. Support for 0 reducers in PIPES. 
+    (Owen O'Malley via devaraj)
+
+    HADOOP-1500. Fix typographical errors in the DFS WebUI.
+    (Nigel Daley via dhruba)
+
+    HADOOP-1076. Periodic checkpoint can continue even if an earlier
+    checkpoint encountered an error.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-1887. The Namenode encounters an ArrayIndexOutOfBoundsException
+    while listing a directory that had a file that was
+    being actively written to.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-1904. The Namenode encounters an exception because the
+    list of blocks per datanode-descriptor was corrupted.
+    (Konstantin Shvachko via dhruba)
+
+    HADOOP-1762. The Namenode fsimage does not contain a list of
+    Datanodes.  (Raghu Angadi via dhruba)
+
+    HADOOP-1890. Removed debugging prints introduced by HADOOP-1774.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1763. Too many lost task trackers on large clusters due to
+    insufficient number of RPC handler threads on the JobTracker.
+    (Devaraj Das)
+
+    HADOOP-1463.  HDFS report correct usage statistics for disk space
+    used by HDFS.  (Hairong Kuang via dhruba)
+
+    HADOOP-1692.  In DFS ant task, don't cache the Configuration.
+    (Chris Douglas via cutting)
+
+    HADOOP-1726.  Remove lib/jetty-ext/ant.jar. (omalley)
+
+    HADOOP-1772.  Fix hadoop-daemon.sh script to get correct hostname
+    under Cygwin.  (Tsz Wo (Nicholas), SZE via cutting)
+
+    HADOOP-1749.  Change TestDFSUpgrade to sort files, fixing sporadic
+    test failures.  (Enis Soztutar via cutting)
+
+    HADOOP-1748.  Fix tasktracker to be able to launch tasks when log
+    directory is relative.  (omalley via cutting)
+
+    HADOOP-1775.  Fix a NullPointerException and an
+    IllegalArgumentException in MapWritable.
+    (Jim Kellerman via cutting)
+
+    HADOOP-1795.  Fix so that jobs can generate output file names with
+    special characters.  (Fr??d??ric Bertin via cutting)
+
+    HADOOP-1810.  Fix incorrect value type in MRBench (SmallJobs)
+    (Devaraj Das via tomwhite)
+
+    HADOOP-1806.  Fix ant task to compile again, also fix default
+    builds to compile ant tasks.  (Chris Douglas via cutting)
+
+    HADOOP-1758.  Fix escape processing in librecordio to not be
+    quadratic.  (Vivek Ratan via cutting)
+
+    HADOOP-1817.  Fix MultiFileSplit to read and write the split
+    length, so that it is not always zero in map tasks.
+    (Thomas Friol via cutting)
+
+    HADOOP-1853.  Fix contrib/streaming to accept multiple -cacheFile
+    options.  (Prachi Gupta via cutting)
+
+    HADOOP-1818. Fix MultiFileInputFormat so that it does not return 
+    empty splits when numPaths < numSplits.  (Thomas Friol via enis)
+
+    HADOOP-1840. Fix race condition which leads to task's diagnostic
+    messages getting lost. (acmurthy) 
+
+    HADOOP-1885. Fix race condition in MiniDFSCluster shutdown.
+    (Chris Douglas via nigel)
+
+    HADOOP-1889.  Fix path in EC2 scripts for building your own AMI.
+    (tomwhite)
+
+    HADOOP-1892.  Fix a NullPointerException in the JobTracker when
+    trying to fetch a task's diagnostic messages from the JobClient.
+    (Amar Kamat via acmurthy)
+
+    HADOOP-1897.  Completely remove about.html page from the web site.
+    (enis)
+
+    HADOOP-1907.  Fix null pointer exception when getting task diagnostics
+    in JobClient. (Christian Kunz via omalley)
+
+    HADOOP-1882.  Remove spurious asterisks from decimal number displays.
+    (Raghu Angadi via cutting)
+
+    HADOOP-1783.  Make S3 FileSystem return Paths fully-qualified with
+    scheme and host.  (tomwhite)
+
+    HADOOP-1925.  Make pipes' autoconf script look for libsocket and libnsl, so
+    that it can compile under Solaris. (omalley)
+
+    HADOOP-1940.  TestDFSUpgradeFromImage must shut down its MiniDFSCluster.
+    (Chris Douglas via nigel)
+
+    HADOOP-1930.  Fix the blame for failed fetchs on the right host. (Arun C.
+    Murthy via omalley)
+
+    HADOOP-1934.  Fix the platform name on Mac to use underscores rather than
+    spaces. (omalley)
+
+    HADOOP-1959.  Use "/" instead of File.separator in the StatusHttpServer.
+    (jimk via omalley)
+
+    HADOOP-1626.  Improve dfsadmin help messages.
+    (Lohit Vijayarenu via dhruba)
+
+    HADOOP-1695.  The SecondaryNamenode waits for the Primary NameNode to
+    start up.  (Dhruba Borthakur)
+
+    HADOOP-1983.  Have Pipes flush the command socket when progress is sent
+    to prevent timeouts during long computations. (omalley)
+
+    HADOOP-1875.  Non-existant directories or read-only directories are
+    filtered from dfs.client.buffer.dir.  (Hairong Kuang via dhruba)
+
+    HADOOP-1992.  Fix the performance degradation in the sort validator. 
+    (acmurthy via omalley)
+
+    HADOOP-1874.  Move task-outputs' promotion/discard to a separate thread
+    distinct from the main heartbeat-processing thread. The main upside being 
+    that we do not lock-up the JobTracker during HDFS operations, which
+    otherwise may lead to lost tasktrackers if the NameNode is unresponsive.
+    (Devaraj Das via acmurthy)
+
+    HADOOP-2026. Namenode prints out one log line for "Number of transactions"
+    at most once every minute. (Dhruba Borthakur)
+
+    HADOOP-2022.  Ensure that status information for successful tasks is correctly
+    recorded at the JobTracker, so that, for example, one may view correct
+    information via taskdetails.jsp. This bug was introduced by HADOOP-1874.
+    (Amar Kamat via acmurthy)
+                                
+    HADOOP-2031.  Correctly maintain the taskid which takes the TIP to 
+    completion, failing which the case of lost tasktrackers isn't handled
+    properly i.e. the map TIP is incorrectly left marked as 'complete' and it
+    is never rescheduled elsewhere, leading to hung reduces.
+    (Devaraj Das via acmurthy)
+
+    HADOOP-2018. The source datanode of a data transfer waits for
+    a response from the target datanode before closing the data stream.
+    (Hairong Kuang via dhruba)
+                                
+    HADOOP-2023. Disable TestLocalDirAllocator on Windows.
+    (Hairong Kuang via nigel)
+
+    HADOOP-2016.  Ignore status-updates from FAILED/KILLED tasks at the 
+    TaskTracker. This fixes a race-condition which caused the tasks to wrongly 
+    remain in the RUNNING state even after being killed by the JobTracker and
+    thus handicap the cleanup of the task's output sub-directory. (acmurthy)
+
+    HADOOP-1771. Fix a NullPointerException in streaming caused by an 
+    IOException in MROutputThread. (lohit vijayarenu via nigel)
+
+    HADOOP-2028. Fix distcp so that the log dir does not need to be 
+    specified and the destination does not need to exist.
+    (Chris Douglas via nigel)
+
+    HADOOP-2044. The namenode protects all lease manipulations using a 
+    sortedLease lock.  (Dhruba Borthakur)
+
+    HADOOP-2051. The TaskCommit thread should not die for exceptions other
+    than the InterruptedException. This behavior is there for the other long
+    running threads in the JobTracker. (Arun C Murthy via ddas)
+
+    HADOOP-1973. The FileSystem object would be accessed on the JobTracker
+    through a RPC in the InterTrackerProtocol. The check for the object being
+    null was missing and hence NPE would be thrown sometimes. This issue fixes
+    that problem.  (Amareshwari Sri Ramadasu via ddas) 
+
+    HADOOP-2033.  The SequenceFile.Writer.sync method was a no-op, which caused
+    very uneven splits for applications like distcp that count on them.
+    (omalley)
+
+    HADOOP-2070.  Added a flush method to pipes' DownwardProtocol and call
+    that before waiting for the application to finish to ensure all buffered
+    data is flushed. (Owen O'Malley via acmurthy)
+
+    HADOOP-2080.  Fixed calculation of the checksum file size when the values
+    are large. (omalley)
+
+    HADOOP-2048.  Change error handling in distcp so that each map copies
+    as much as possible before reporting the error. Also report progress on
+    every copy. (Chris Douglas via omalley)
+
+    HADOOP-2073.  Change size of VERSION file after writing contents to it.
+    (Konstantin Shvachko via dhruba)
+ 
+    HADOOP-2102.  Fix the deprecated ToolBase to pass its Configuration object
+    to the superceding ToolRunner to ensure it picks up the appropriate
+    configuration resources. (Dennis Kubes and Enis Soztutar via acmurthy) 
+ 
+    HADOOP-2103.  Fix minor javadoc bugs introduce by HADOOP-2046. (Nigel
+    Daley via acmurthy) 
+
+  IMPROVEMENTS
+
+    HADOOP-1908. Restructure data node code so that block sending and 
+    receiving are seperated from data transfer header handling.
+    (Hairong Kuang via dhruba)
+
+    HADOOP-1921. Save the configuration of completed/failed jobs and make them
+    available via the web-ui. (Amar Kamat via devaraj)
+
+    HADOOP-1266. Remove dependency of package org.apache.hadoop.net on 
+    org.apache.hadoop.dfs.  (Hairong Kuang via dhruba)
+
+    HADOOP-1779. Replace INodeDirectory.getINode() by a getExistingPathINodes()
+    to allow the retrieval of all existing INodes along a given path in a
+    single lookup. This facilitates removal of the 'parent' field in the
+    inode. (Christophe Taton via dhruba)
+
+    HADOOP-1756. Add toString() to some Writable-s. (ab)
+
+    HADOOP-1727.  New classes: MapWritable and SortedMapWritable.
+    (Jim Kellerman via ab)
+
+    HADOOP-1651.  Improve progress reporting.
+    (Devaraj Das via tomwhite)
+
+    HADOOP-1595.  dfsshell can wait for a file to achieve its intended
+    replication target. (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-1693.  Remove un-needed log fields in DFS replication classes,
+    since the log may be accessed statically. (Konstantin Shvachko via cutting)
+
+    HADOOP-1231.  Add generics to Mapper and Reducer interfaces.
+    (tomwhite via cutting)
+
+    HADOOP-1436.  Improved command-line APIs, so that all tools need
+    not subclass ToolBase, and generic parameter parser is public.
+    (Enis Soztutar via cutting)
+
+    HADOOP-1703.  DFS-internal code cleanups, removing several uses of
+    the obsolete UTF8.  (Christophe Taton via cutting)
+
+    HADOOP-1731.  Add Hadoop's version to contrib jar file names.
+    (cutting)
+
+    HADOOP-1689.  Make shell scripts more portable.  All shell scripts
+    now explicitly depend on bash, but do not require that bash be
+    installed in a particular location, as long as it is on $PATH.
+    (cutting)
+
+    HADOOP-1744.  Remove many uses of the deprecated UTF8 class from
+    the HDFS namenode.  (Christophe Taton via cutting)
+
+    HADOOP-1654.  Add IOUtils class, containing generic io-related
+    utility methods.   (Enis Soztutar via cutting)
+
+    HADOOP-1158.  Change JobTracker to record map-output transmission
+    errors and use them to trigger speculative re-execution of tasks.
+    (Arun C Murthy via cutting)
+
+    HADOOP-1601.  Change GenericWritable to use ReflectionUtils for
+    instance creation, avoiding classloader issues, and to implement
+    Configurable.  (Enis Soztutar via cutting)
+
+    HADOOP-1750.  Log standard output and standard error when forking
+    task processes.  (omalley via cutting)
+
+    HADOOP-1803.  Generalize build.xml to make files in all
+    src/contrib/*/bin directories executable.  (stack via cutting)
+
+    HADOOP-1739.  Let OS always choose the tasktracker's umbilical
+    port.  Also switch default address for umbilical connections to
+    loopback.  (cutting)
+
+    HADOOP-1812. Let OS choose ports for IPC and RPC unit tests. (cutting)
+
+    HADOOP-1825.  Create $HADOOP_PID_DIR when it does not exist.
+    (Michael Bieniosek via cutting)
+
+    HADOOP-1425.  Replace uses of ToolBase with the Tool interface.
+    (Enis Soztutar via cutting)
+
+    HADOOP-1569.  Reimplement DistCP to use the standard FileSystem/URI
+    code in Hadoop so that you can copy from and to all of the supported file 
+    systems.(Chris Douglas via omalley)
+
+    HADOOP-1018.  Improve documentation w.r.t handling of lost hearbeats between 
+    TaskTrackers and JobTracker. (acmurthy)
+
+    HADOOP-1718.  Add ant targets for measuring code coverage with clover.
+    (simonwillnauer via nigel)
+
+    HADOOP-1592.  Log error messages to the client console when tasks
+    fail.  (Amar Kamat via cutting)
+
+    HADOOP-1879.  Remove some unneeded casts.  (Nilay Vaish via cutting)
+
+    HADOOP-1878.  Add space between priority links on job details
+    page. (Thomas Friol via cutting)
+
+    HADOOP-120.  In ArrayWritable, prevent creation with null value
+    class, and improve documentation.  (Cameron Pope via cutting)
+
+    HADOOP-1926. Add a random text writer example/benchmark so that we can
+    benchmark compression codecs on random data. (acmurthy via omalley)
+
+    HADOOP-1906. Warn the user if they have an obsolete madred-default.xml
+    file in their configuration directory. (acmurthy via omalley)
+
+    HADOOP-1971.  Warn when job does not specify a jar. (enis via cutting)
+
+    HADOOP-1942. Increase the concurrency of transaction logging to 
+    edits log. Reduce the number of syncs by double-buffering the changes
+    to the transaction log. (Dhruba Borthakur)
+
+    HADOOP-2046.  Improve mapred javadoc.  (Arun C. Murthy via cutting)
+
+    HADOOP-2105.  Improve overview.html to clarify supported platforms, 
+    software pre-requisites for hadoop, how to install them on various 
+    platforms and a better general description of hadoop and it's utility. 
+    (Jim Kellerman via acmurthy) 
+
+
+Release 0.14.4 - 2007-11-26
+
+  BUG FIXES
+
+    HADOOP-2140.  Add missing Apache Licensing text at the front of several
+    C and C++ files.
+
+    HADOOP-2169.  Fix the DT_SONAME field of libhdfs.so to set it to the
+    correct value of 'libhdfs.so', currently it is set to the absolute path of
+    libhdfs.so. (acmurthy) 
+
+    HADOOP-2001.  Make the job priority updates and job kills synchronized on
+    the JobTracker. Deadlock was seen in the JobTracker because of the lack of
+    this synchronization.  (Arun C Murthy via ddas)
+
+
+Release 0.14.3 - 2007-10-19
+
+  BUG FIXES
+
+    HADOOP-2053. Fixed a dangling reference to a memory buffer in the map 
+    output sorter. (acmurthy via omalley)
+
+    HADOOP-2036. Fix a NullPointerException in JvmMetrics class. (nigel)
+
+    HADOOP-2043. Release 0.14.2 was compiled with Java 1.6 rather than
+    Java 1.5.  (cutting)
+
+
+Release 0.14.2 - 2007-10-09
+
+  BUG FIXES
+
+    HADOOP-1948. Removed spurious error message during block crc upgrade.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1862.  reduces are getting stuck trying to find map outputs. 
+    (Arun C. Murthy via ddas)
+ 
+    HADOOP-1977. Fixed handling of ToolBase cli options in JobClient.
+    (enis via omalley)
+
+    HADOOP-1972.  Fix LzoCompressor to ensure the user has actually asked
+    to finish compression. (arun via omalley)
+
+    HADOOP-1970.  Fix deadlock in progress reporting in the task. (Vivek
+    Ratan via omalley)
+
+    HADOOP-1978.  Name-node removes edits.new after a successful startup.
+    (Konstantin Shvachko via dhruba)
+
+    HADOOP-1955.  The Namenode tries to not pick the same source Datanode for
+    a replication request if the earlier replication request for the same
+    block and that source Datanode had failed.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1961.  The -get option to dfs-shell works when a single filename
+    is specified.  (Raghu Angadi via dhruba)
+
+    HADOOP-1997.  TestCheckpoint closes the edits file after writing to it,
+    otherwise the rename of this file on Windows fails.
+    (Konstantin Shvachko via dhruba)
+
+Release 0.14.1 - 2007-09-04
+
+  BUG FIXES
+
+    HADOOP-1740.  Fix null pointer exception in sorting map outputs. (Devaraj
+    Das via omalley)
+
+    HADOOP-1790.  Fix tasktracker to work correctly on multi-homed
+    boxes.  (Torsten Curdt via cutting)
+
+    HADOOP-1798.  Fix jobtracker to correctly account for failed
+    tasks.  (omalley via cutting)
+
+
+Release 0.14.0 - 2007-08-17
+
+  INCOMPATIBLE CHANGES
+
+  1. HADOOP-1134.
+     CONFIG/API - dfs.block.size must now be a multiple of
+       io.byte.per.checksum, otherwise new files can not be written.
+     LAYOUT - DFS layout version changed from -6 to -7, which will require an
+       upgrade from previous versions.
+     PROTOCOL - Datanode RPC protocol version changed from 7 to 8.
+
+  2. HADOOP-1283
+     API - deprecated file locking API.
+
+  3. HADOOP-894
+     PROTOCOL - changed ClientProtocol to fetch parts of block locations.
+
+  4. HADOOP-1336
+     CONFIG - Enable speculative execution by default.
+
+  5. HADOOP-1197
+     API - deprecated method for Configuration.getObject, because
+       Configurations should only contain strings.
+
+  6. HADOOP-1343
+     API - deprecate Configuration.set(String,Object) so that only strings are
+       put in Configrations.
+
+  7. HADOOP-1207
+     CLI - Fix FsShell 'rm' command to continue when a non-existent file is
+       encountered.
+
+  8. HADOOP-1473
+     CLI/API - Job, TIP, and Task id formats have changed and are now unique
+       across job tracker restarts.
+
+  9. HADOOP-1400
+     API - JobClient constructor now takes a JobConf object instead of a
+       Configuration object.
+
+  NEW FEATURES and BUG FIXES
+
+  1. HADOOP-1197.  In Configuration, deprecate getObject() and add
+     getRaw(), which skips variable expansion. (omalley via cutting)
+
+  2. HADOOP-1343.  In Configuration, deprecate set(String,Object) and
+     implement Iterable. (omalley via cutting)
+
+  3. HADOOP-1344.  Add RunningJob#getJobName(). (Michael Bieniosek via cutting)
+
+  4. HADOOP-1342.  In aggregators, permit one to limit the number of
+     unique values per key.  (Runping Qi via cutting)
+
+  5. HADOOP-1340.  Set the replication factor of the MD5 file in the filecache
+     to be the same as the replication factor of the original file.
+     (Dhruba Borthakur via tomwhite.)
+
+  6. HADOOP-1355.  Fix null pointer dereference in 
+     TaskLogAppender.append(LoggingEvent).  (Arun C Murthy via tomwhite.)
+
+  7. HADOOP-1357.  Fix CopyFiles to correctly avoid removing "/".
+     (Arun C Murthy via cutting)
+
+  8. HADOOP-234.  Add pipes facility, which permits writing MapReduce
+     programs in C++.
+
+  9. HADOOP-1359.  Fix a potential NullPointerException in HDFS.
+     (Hairong Kuang via cutting)
+
+ 10. HADOOP-1364.  Fix inconsistent synchronization in SequenceFile.
+     (omalley via cutting)
+
+ 11. HADOOP-1379.  Add findbugs target to build.xml.
+     (Nigel Daley via cutting)
+
+ 12. HADOOP-1364.  Fix various inconsistent synchronization issues.
+     (Devaraj Das via cutting)
+
+ 13. HADOOP-1393.  Remove a potential unexpected negative number from
+     uses of random number generator. (omalley via cutting)
+
+ 14. HADOOP-1387.  A number of "performance" code-cleanups suggested
+     by findbugs.  (Arun C Murthy via cutting)
+
+ 15. HADOOP-1401.  Add contrib/hbase javadoc to tree.  (stack via cutting)
+
+ 16. HADOOP-894.  Change HDFS so that the client only retrieves a limited
+     number of block locations per request from the namenode.
+     (Konstantin Shvachko via cutting)
+
+ 17. HADOOP-1406.  Plug a leak in MapReduce's use of metrics.
+     (David Bowen via cutting)
+
+ 18. HADOOP-1394.  Implement "performance" code-cleanups in HDFS
+     suggested by findbugs.  (Raghu Angadi via cutting)
+
+ 19. HADOOP-1413.  Add example program that uses Knuth's dancing links
+     algorithm to solve pentomino problems.  (omalley via cutting)
+
+ 20. HADOOP-1226.  Change HDFS so that paths it returns are always
+     fully qualified.  (Dhruba Borthakur via cutting)
+
+ 21. HADOOP-800.  Improvements to HDFS web-based file browser.
+     (Enis Soztutar via cutting)
+
+ 22. HADOOP-1408.  Fix a compiler warning by adding a class to replace
+     a generic.  (omalley via cutting)
+
+ 23. HADOOP-1376.  Modify RandomWriter example so that it can generate
+     data for the Terasort benchmark.  (Devaraj Das via cutting)
+
+ 24. HADOOP-1429.  Stop logging exceptions during normal IPC server
+     shutdown.  (stack via cutting)
+
+ 25. HADOOP-1461.  Fix the synchronization of the task tracker to
+     avoid lockups in job cleanup.  (Arun C Murthy via omalley)
+
+ 26. HADOOP-1446.  Update the TaskTracker metrics while the task is
+     running. (Devaraj via omalley)
+
+ 27. HADOOP-1414.  Fix a number of issues identified by FindBugs as
+     "Bad Practice".  (Dhruba Borthakur via cutting)
+
+ 28. HADOOP-1392.  Fix "correctness" bugs identified by FindBugs in
+     fs and dfs packages.  (Raghu Angadi via cutting)
+
+ 29. HADOOP-1412.  Fix "dodgy" bugs identified by FindBugs in fs and
+     io packages.  (Hairong Kuang via cutting)
+
+ 30. HADOOP-1261.  Remove redundant events from HDFS namenode's edit
+     log when a datanode restarts.  (Raghu Angadi via cutting)
+
+ 31. HADOOP-1336.  Re-enable speculative execution by
+     default. (omalley via cutting)
+
+ 32. HADOOP-1311.  Fix a bug in BytesWritable#set() where start offset
+     was ignored.  (Dhruba Borthakur via cutting)
+
+ 33. HADOOP-1450.  Move checksumming closer to user code, so that
+     checksums are created before data is stored in large buffers and
+     verified after data is read from large buffers, to better catch
+     memory errors.  (cutting)
+
+ 34. HADOOP-1447.  Add support in contrib/data_join for text inputs.
+     (Senthil Subramanian via cutting)
+
+ 35. HADOOP-1456.  Fix TestDecommission assertion failure by setting
+     the namenode to ignore the load on datanodes while allocating
+     replicas.  (Dhruba Borthakur via tomwhite)
+
+ 36. HADOOP-1396.  Fix FileNotFoundException on DFS block.
+     (Dhruba Borthakur via tomwhite)
+
+ 37. HADOOP-1467.  Remove redundant counters from WordCount example.
+     (Owen O'Malley via tomwhite)
+
+ 38. HADOOP-1139.  Log HDFS block transitions at INFO level, to better
+     enable diagnosis of problems.  (Dhruba Borthakur via cutting)
+
+ 39. HADOOP-1269.  Finer grained locking in HDFS namenode.
+     (Dhruba Borthakur via cutting)
+
+ 40. HADOOP-1438.  Improve HDFS documentation, correcting typos and
+     making images appear in PDF.  Also update copyright date for all
+     docs.  (Luke Nezda via cutting)
+
+ 41. HADOOP-1457.  Add counters for monitoring task assignments.
+     (Arun C Murthy via tomwhite)
+
+ 42. HADOOP-1472.  Fix so that timed-out tasks are counted as failures
+     rather than as killed.  (Arun C Murthy via cutting)
+
+ 43. HADOOP-1234.  Fix a race condition in file cache that caused
+     tasktracker to not be able to find cached files.
+     (Arun C Murthy via cutting)
+
+ 44. HADOOP-1482.  Fix secondary namenode to roll info port.
+     (Dhruba Borthakur via cutting)
+
+ 45. HADOOP-1300.  Improve removal of excess block replicas to be
+     rack-aware.  Attempts are now made to keep replicas on more
+     racks.  (Hairong Kuang via cutting)
+
+ 46. HADOOP-1417.  Disable a few FindBugs checks that generate a lot
+     of spurious warnings.  (Nigel Daley via cutting)
+
+ 47. HADOOP-1320.  Rewrite RandomWriter example to bypass reduce.
+     (Arun C Murthy via cutting)
+
+ 48. HADOOP-1449.  Add some examples to contrib/data_join.
+     (Senthil Subramanian via cutting)
+
+ 49. HADOOP-1459.  Fix so that, in HDFS, getFileCacheHints() returns
+     hostnames instead of IP addresses.  (Dhruba Borthakur via cutting)
+
+ 50. HADOOP-1493.  Permit specification of "java.library.path" system
+     property in "mapred.child.java.opts" configuration property.
+     (Enis Soztutar via cutting)
+
+ 51. HADOOP-1372.  Use LocalDirAllocator for HDFS temporary block
+     files, so that disk space, writability, etc. is considered.
+     (Dhruba Borthakur via cutting)
+
+ 52. HADOOP-1193.  Pool allocation of compression codecs.  This
+     eliminates a memory leak that could cause OutOfMemoryException,
+     and also substantially improves performance.
+     (Arun C Murthy via cutting)
+
+ 53. HADOOP-1492.  Fix a NullPointerException handling version
+     mismatch during datanode registration.
+     (Konstantin Shvachko via cutting)
+
+ 54. HADOOP-1442.  Fix handling of zero-length input splits.
+     (Senthil Subramanian via cutting)
+
+ 55. HADOOP-1444.  Fix HDFS block id generation to check pending
+     blocks for duplicates. (Dhruba Borthakur via cutting)
+
+ 56. HADOOP-1207.  Fix FsShell's 'rm' command to not stop when one of
+     the named files does not exist.  (Tsz Wo Sze via cutting)
+
+ 57. HADOOP-1475.  Clear tasktracker's file cache before it
+     re-initializes, to avoid confusion.  (omalley via cutting)
+
+ 58. HADOOP-1505.  Remove spurious stacktrace in ZlibFactory
+     introduced in HADOOP-1093.  (Michael Stack via tomwhite)
+
+ 59. HADOOP-1484.  Permit one to kill jobs from the web ui.  Note that
+     this is disabled by default.  One must set
+     "webinterface.private.actions" to enable this.
+     (Enis Soztutar via cutting)
+
+ 60. HADOOP-1003.  Remove flushing of namenode edit log from primary
+     namenode lock, increasing namenode throughput.
+     (Dhruba Borthakur via cutting)
+
+ 61. HADOOP-1023.  Add links to searchable mail archives.
+     (tomwhite via cutting)
+
+ 62. HADOOP-1504.  Fix terminate-hadoop-cluster script in contrib/ec2
+     to only terminate Hadoop instances, and not other instances
+     started by the same user.  (tomwhite via cutting)
+
+ 63. HADOOP-1462.  Improve task progress reporting.  Progress reports
+     are no longer blocking since i/o is performed in a separate
+     thread.  Reporting during sorting and more is also more
+     consistent.  (Vivek Ratan via cutting)
+
+ 64. [ intentionally blank ]
+
+ 65. HADOOP-1453.  Remove some unneeded calls to FileSystem#exists()
+     when opening files, reducing the namenode load somewhat.
+     (Raghu Angadi via cutting)
+
+ 66. HADOOP-1489.  Fix text input truncation bug due to mark/reset.
+     Add a unittest. (Bwolen Yang via cutting)
+
+ 67. HADOOP-1455.  Permit specification of arbitrary job options on
+     pipes command line.  (Devaraj Das via cutting)
+
+ 68. HADOOP-1501.  Better randomize sending of block reports to
+     namenode, so reduce load spikes.  (Dhruba Borthakur via cutting)
+
+ 69. HADOOP-1147.  Remove @author tags from Java source files.
+
+ 70. HADOOP-1283.  Convert most uses of UTF8 in the namenode to be
+     String.  (Konstantin Shvachko via cutting)
+
+ 71. HADOOP-1511.  Speedup hbase unit tests.  (stack via cutting)
+
+ 72. HADOOP-1517.  Remove some synchronization in namenode to permit
+     finer grained locking previously added.  (Konstantin Shvachko via cutting)
+
+ 73. HADOOP-1512.  Fix failing TestTextInputFormat on Windows.
+     (Senthil Subramanian via nigel)
+
+ 74. HADOOP-1518.  Add a session id to job metrics, for use by HOD.
+     (David Bowen via cutting)
+
+ 75. HADOOP-1292.  Change 'bin/hadoop fs -get' to first copy files to
+     a temporary name, then rename them to their final name, so that
+     failures don't leave partial files.  (Tsz Wo Sze via cutting)
+
+ 76. HADOOP-1377.  Add support for modification time to FileSystem and
+     implement in HDFS and local implementations.  Also, alter access
+     to file properties to be through a new FileStatus interface.
+     (Dhruba Borthakur via cutting)
+
+ 77. HADOOP-1515.  Add MultiFileInputFormat, which can pack multiple,
+     typically small, input files into each split.  (Enis Soztutar via cutting)
+
+ 78. HADOOP-1514.  Make reducers report progress while waiting for map
+     outputs, so they're not killed.  (Vivek Ratan via cutting)
+
+ 79. HADOOP-1508.  Add an Ant task for FsShell operations.  Also add
+     new FsShell commands "touchz", "test" and "stat".
+     (Chris Douglas via cutting)
+
+ 80. HADOOP-1028.  Add log messages for server startup and shutdown.
+     (Tsz Wo Sze via cutting)
+
+ 81. HADOOP-1485.  Add metrics for monitoring shuffle.
+     (Devaraj Das via cutting)
+
+ 82. HADOOP-1536.  Remove file locks from libhdfs tests.
+     (Dhruba Borthakur via nigel)
+
+ 83. HADOOP-1520.  Add appropriate synchronization to FSEditsLog.
+     (Dhruba Borthakur via nigel)
+
+ 84. HADOOP-1513.  Fix a race condition in directory creation. 
+     (Devaraj via omalley)
+
+ 85. HADOOP-1546.  Remove spurious column from HDFS web UI.
+     (Dhruba Borthakur via cutting)
+
+ 86. HADOOP-1556.  Make LocalJobRunner delete working files at end of
+     job run.  (Devaraj Das via tomwhite)
+
+ 87. HADOOP-1571.  Add contrib lib directories to root build.xml
+     javadoc classpath.  (Michael Stack via tomwhite)
+
+ 88. HADOOP-1554.  Log killed tasks to the job history and display them on the
+     web/ui. (Devaraj Das via omalley)
+
+ 89. HADOOP-1533.  Add persistent error logging for distcp. The logs are stored
+    into a specified hdfs directory. (Senthil Subramanian via omalley)
+
+ 90. HADOOP-1286.  Add support to HDFS for distributed upgrades, which
+     permits coordinated upgrade of datanode data.
+     (Konstantin Shvachko via cutting)
+
+ 91. HADOOP-1580.  Improve contrib/streaming so that subprocess exit
+     status is displayed for errors.  (John Heidemann via cutting)
+
+ 92. HADOOP-1448.  In HDFS, randomize lists of non-local block
+     locations returned to client, so that load is better balanced.
+     (Hairong Kuang via cutting)
+
+ 93. HADOOP-1578.  Fix datanode to send its storage id to namenode
+     during registration.  (Konstantin Shvachko via cutting)
+
+ 94. HADOOP-1584.  Fix a bug in GenericWritable which limited it to
+     128 types instead of 256.  (Espen Amble Kolstad via cutting)
+
+ 95. HADOOP-1473.  Make job ids unique across jobtracker restarts.
+     (omalley via cutting)
+
+ 96. HADOOP-1582.  Fix hdfslib to return 0 instead of -1 at
+     end-of-file, per C conventions.  (Christian Kunz via cutting)
+
+ 97. HADOOP-911.  Fix a multithreading bug in libhdfs.
+     (Christian Kunz)
+
+ 98. HADOOP-1486.  Fix so that fatal exceptions in namenode cause it
+     to exit.  (Dhruba Borthakur via cutting)
+
+ 99. HADOOP-1470.  Factor checksum generation and validation out of
+     ChecksumFileSystem so that it can be reused by FileSystem's with
+     built-in checksumming.  (Hairong Kuang via cutting)
+
+100. HADOOP-1590.  Use relative urls in jobtracker jsp pages, so that
+     webapp can be used in non-root contexts.  (Thomas Friol via cutting)
+
+101. HADOOP-1596.  Fix the parsing of taskids by streaming and improve the
+     error reporting. (omalley)
+
+102. HADOOP-1535.  Fix the user-controlled grouping to the reduce function.
+     (Vivek Ratan via omalley)
+
+103. HADOOP-1585.  Modify GenericWritable to declare the classes as subtypes
+     of Writable (Espen Amble Kolstad via omalley)
+
+104. HADOOP-1576.  Fix errors in count of completed tasks when
+     speculative execution is enabled.  (Arun C Murthy via cutting)
+
+105. HADOOP-1598.  Fix license headers: adding missing; updating old.
+     (Enis Soztutar via cutting)
+
+106. HADOOP-1547.  Provide examples for aggregate library.
+     (Runping Qi via tomwhite)
+
+107. HADOOP-1570.  Permit jobs to enable and disable the use of
+     hadoop's native library.  (Arun C Murthy via cutting)
+
+108. HADOOP-1433.  Add job priority.  (Johan Oskarsson via tomwhite)
+
+109. HADOOP-1597.  Add status reports and post-upgrade options to HDFS
+     distributed upgrade.  (Konstantin Shvachko via cutting)
+
+110. HADOOP-1524.  Permit user task logs to appear as they're
+     created.  (Michael Bieniosek via cutting)
+
+111. HADOOP-1599.  Fix distcp bug on Windows.  (Senthil Subramanian via cutting)
+
+112. HADOOP-1562.  Add JVM metrics, including GC and logging stats.
+     (David Bowen via cutting)
+
+113. HADOOP-1613.  Fix "DFS Health" page to display correct time of
+     last contact.  (Dhruba Borthakur via cutting)
+
+114. HADOOP-1134.  Add optimized checksum support to HDFS.  Checksums
+     are now stored with each block, rather than as parallel files.
+     This reduces the namenode's memory requirements and increases
+     data integrity.  (Raghu Angadi via cutting)
+
+115. HADOOP-1400.  Make JobClient retry requests, so that clients can
+     survive jobtracker problems.  (omalley via cutting)
+
+116. HADOOP-1564.  Add unit tests for HDFS block-level checksums.
+     (Dhruba Borthakur via cutting)
+
+117. HADOOP-1620.  Reduce the number of abstract FileSystem methods,
+     simplifying implementations.  (cutting)
+
+118. HADOOP-1625.  Fix a "could not move files" exception in datanode.
+     (Raghu Angadi via cutting)
+
+119. HADOOP-1624.  Fix an infinite loop in datanode. (Raghu Angadi via cutting)
+
+120. HADOOP-1084.  Switch mapred file cache to use file modification
+     time instead of checksum to detect file changes, as checksums are
+     no longer easily accessed.  (Arun C Murthy via cutting)
+
+130. HADOOP-1623.  Fix an infinite loop when copying directories.
+     (Dhruba Borthakur via cutting)
+
+131. HADOOP-1603.  Fix a bug in namenode initialization where
+     default replication is sometimes reset to one on restart.
+     (Raghu Angadi via cutting)
+
+132. HADOOP-1635.  Remove hardcoded keypair name and fix launch-hadoop-cluster
+     to support later versions of ec2-api-tools.  (Stu Hood via tomwhite)
+
+133. HADOOP-1638.  Fix contrib EC2 scripts to support NAT addressing.
+     (Stu Hood via tomwhite) 
+
+134. HADOOP-1632.  Fix an IllegalArgumentException in fsck.
+     (Hairong Kuang via cutting)
+
+135. HADOOP-1619.  Fix FSInputChecker to not attempt to read past EOF.
+     (Hairong Kuang via cutting)
+
+136. HADOOP-1640.  Fix TestDecommission on Windows.
+     (Dhruba Borthakur via cutting)
+
+137. HADOOP-1587.  Fix TestSymLink to get required system properties.
+     (Devaraj Das via omalley)
+
+138. HADOOP-1628.  Add block CRC protocol unit tests. (Raghu Angadi via omalley)
+
+139. HADOOP-1653.  FSDirectory code-cleanups. FSDirectory.INode
+     becomes a static class.  (Christophe Taton via dhruba)
+
+140. HADOOP-1066.  Restructure documentation to make more user
+     friendly.  (Connie Kleinjans and Jeff Hammerbacher via cutting)
+
+141. HADOOP-1551.  libhdfs supports setting replication factor and
+     retrieving modification time of files.  (Sameer Paranjpye via dhruba)
+
+141. HADOOP-1647.  FileSystem.getFileStatus returns valid values for "/".
+     (Dhruba Borthakur via dhruba)
+
+142. HADOOP-1657.  Fix NNBench to ensure that the block size is a
+     multiple of bytes.per.checksum. (Raghu Angadi via dhruba)
+
+143. HADOOP-1553.  Replace user task output and log capture code to use shell
+     redirection instead of copier threads in the TaskTracker. Capping the
+     size of the output is now done via tail in memory and thus should not be 
+     large. The output of the tasklog servlet is not forced into UTF8 and is
+     not buffered entirely in memory. (omalley)
+     Configuration changes to hadoop-default.xml:
+       remove mapred.userlog.num.splits
+       remove mapred.userlog.purge.splits
+       change default mapred.userlog.limit.kb to 0 (no limit)
+       change default mapred.userlog.retain.hours to 24
+     Configuration changes to log4j.properties:
+       remove log4j.appender.TLA.noKeepSplits
+       remove log4j.appender.TLA.purgeLogSplits
+       remove log4j.appender.TLA.logsRetainHours
+     URL changes:
+       http://<tasktracker>/tasklog.jsp -> http://<tasktracker>tasklog with
+         parameters limited to start and end, which may be positive (from
+         start) or negative (from end).
+     Environment:
+       require bash (v2 or later) and tail
+
+144. HADOOP-1659.  Fix a job id/job name mixup. (Arun C. Murthy via omalley)
+
+145. HADOOP-1665.  With HDFS Trash enabled and the same file was created
+     and deleted more than once, the suceeding deletions creates Trash item
+     names suffixed with a integer.  (Dhruba Borthakur via dhruba)
+
+146. HADOOP-1666.  FsShell object can be used for multiple fs commands.
+     (Dhruba Borthakur via dhruba)
+
+147. HADOOP-1654.  Remove performance regression introduced by Block CRC.
+     (Raghu Angadi via dhruba)
+
+148. HADOOP-1680.  Improvements to Block CRC upgrade messages.
+     (Raghu Angadi via dhruba)
+
+149. HADOOP-71.  Allow Text and SequenceFile Map/Reduce inputs from non-default 
+     filesystems. (omalley)
+
+150. HADOOP-1568.  Expose HDFS as xml/http filesystem to provide cross-version
+     compatability. (Chris Douglas via omalley)
+
+151. HADOOP-1668.  Added an INCOMPATIBILITY section to CHANGES.txt. (nigel)
+
+152. HADOOP-1629.  Added a upgrade test for HADOOP-1134.
+     (Raghu Angadi via nigel)
+
+153. HADOOP-1698.  Fix performance problems on map output sorting for jobs
+     with large numbers of reduces. (Devaraj Das via omalley)
+
+154. HADOOP-1716.  Fix a Pipes wordcount example to remove the 'file:'
+     schema from its output path.  (omalley via cutting)
+
+155. HADOOP-1714.  Fix TestDFSUpgradeFromImage to work on Windows.
+     (Raghu Angadi via nigel)
+
+156. HADOOP-1663.  Return a non-zero exit code if streaming fails. (Lohit Renu
+     via omalley)
+
+157. HADOOP-1712.  Fix an unhandled exception on datanode during block
+     CRC upgrade. (Raghu Angadi via cutting)
+
+158. HADOOP-1717.  Fix TestDFSUpgradeFromImage to work on Solaris.
+     (nigel via cutting)
+
+159. HADOOP-1437.  Add Eclipse plugin in contrib.
+     (Eugene Hung and Christophe Taton via cutting)
+
+
+Release 0.13.0 - 2007-06-08
+
+ 1. HADOOP-1047.  Fix TestReplication to succeed more reliably.
+    (Hairong Kuang via cutting)
+
+ 2. HADOOP-1063.  Fix a race condition in MiniDFSCluster test code.
+    (Hairong Kuang via cutting)
+
+ 3. HADOOP-1101.  In web ui, split shuffle statistics from reduce
+    statistics, and add some task averages.  (Devaraj Das via cutting)
+
+ 4. HADOOP-1071.  Improve handling of protocol version mismatch in
+    JobTracker.  (Tahir Hashmi via cutting)
+
+ 5. HADOOP-1116.  Increase heap size used for contrib unit tests.
+    (Philippe Gassmann via cutting)
+
+ 6. HADOOP-1120.  Add contrib/data_join, tools to simplify joining
+    data from multiple sources using MapReduce.  (Runping Qi via cutting)
+
+ 7. HADOOP-1064.  Reduce log level of some DFSClient messages.
+    (Dhruba Borthakur via cutting)
+
+ 8. HADOOP-1137.  Fix StatusHttpServer to work correctly when
+    resources are in a jar file.  (Benjamin Reed via cutting)
+
+ 9. HADOOP-1094.  Optimize generated Writable implementations for
+    records to not allocate a new BinaryOutputArchive or
+    BinaryInputArchive per call.  (Milind Bhandarkar via cutting)
+
+10. HADOOP-1068.  Improve error message for clusters with 0 datanodes.
+    (Dhruba Borthakur via tomwhite)
+
+11. HADOOP-1122.  Fix divide-by-zero exception in FSNamesystem
+    chooseTarget method.  (Dhruba Borthakur via tomwhite)
+
+12. HADOOP-1131.  Add a closeAll() static method to FileSystem.
+    (Philippe Gassmann via tomwhite)
+
+13. HADOOP-1085.  Improve port selection in HDFS and MapReduce test
+    code.  Ports are now selected by the OS during testing rather than
+    by probing for free ports, improving test reliability.
+    (Arun C Murthy via cutting)
+
+14. HADOOP-1153.  Fix HDFS daemons to correctly stop their threads.
+    (Konstantin Shvachko via cutting)
+
+15. HADOOP-1146.  Add a counter for reduce input keys and rename the
+    "reduce input records" counter to be "reduce input groups".
+    (David Bowen via cutting)
+
+16. HADOOP-1165.  In records, replace idential generated toString
+    methods with a method on the base class.  (Milind Bhandarkar via cutting)
+
+17. HADOOP-1164.  Fix TestReplicationPolicy to specify port zero, so
+    that a free port is automatically selected.  (omalley via cutting)
+
+18. HADOOP-1166.  Add a NullOutputFormat and use it in the
+    RandomWriter example.  (omalley via cutting)
+
+19. HADOOP-1169.  Fix a cut/paste error in CopyFiles utility so that
+    S3-based source files are correctly copied.  (Michael Stack via cutting)
+
+20. HADOOP-1167.  Remove extra synchronization in InMemoryFileSystem.
+    (omalley via cutting)
+
+21. HADOOP-1110.  Fix an off-by-one error counting map inputs.
+    (David Bowen via cutting)
+
+22. HADOOP-1178.  Fix a NullPointerException during namenode startup.
+    (Dhruba Borthakur via cutting)
+
+23. HADOOP-1011.  Fix a ConcurrentModificationException when viewing
+    job history.  (Tahir Hashmi via cutting)
+
+24. HADOOP-672.  Improve help for fs shell commands.
+    (Dhruba Borthakur via cutting)
+
+25. HADOOP-1170.  Improve datanode performance by removing device
+    checks from common operations.  (Igor Bolotin via cutting)
+
+26. HADOOP-1090.  Fix SortValidator's detection of whether the input 
+    file belongs to the sort-input or sort-output directory.
+    (Arun C Murthy via tomwhite)
+
+27. HADOOP-1081.  Fix bin/hadoop on Darwin.  (Michael Bieniosek via cutting)
+
+28. HADOOP-1045.  Add contrib/hbase, a BigTable-like online database.
+    (Jim Kellerman via cutting)
+
+29. HADOOP-1156.  Fix a NullPointerException in MiniDFSCluster.
+    (Hairong Kuang via cutting)
+
+30. HADOOP-702.  Add tools to help automate HDFS upgrades.
+    (Konstantin Shvachko via cutting)
+
+31. HADOOP-1163.  Fix ganglia metrics to aggregate metrics from different
+    hosts properly.  (Michael Bieniosek via tomwhite)
+
+32. HADOOP-1194.  Make compression style record level for map output
+    compression.  (Arun C Murthy via tomwhite)
+
+33. HADOOP-1187.  Improve DFS Scalability: avoid scanning entire list of
+    datanodes in getAdditionalBlocks.  (Dhruba Borthakur via tomwhite)
+
+34. HADOOP-1133.  Add tool to analyze and debug namenode on a production
+    cluster.  (Dhruba Borthakur via tomwhite)
+
+35. HADOOP-1151.  Remove spurious printing to stderr in streaming 
+    PipeMapRed.  (Koji Noguchi via tomwhite)
+
+36. HADOOP-988.  Change namenode to use a single map of blocks to metadata.
+    (Raghu Angadi via tomwhite)
+
+37. HADOOP-1203.  Change UpgradeUtilities used by DFS tests to use
+    MiniDFSCluster to start and stop NameNode/DataNodes.
+    (Nigel Daley via tomwhite)
+
+38. HADOOP-1217.  Add test.timeout property to build.xml, so that
+    long-running unit tests may be automatically terminated.
+    (Nigel Daley via cutting)
+
+39. HADOOP-1149.  Improve DFS Scalability: make 
+    processOverReplicatedBlock() a no-op if blocks are not 
+    over-replicated.  (Raghu Angadi via tomwhite)
+
+40. HADOOP-1149.  Improve DFS Scalability: optimize getDistance(), 
+    contains(), and isOnSameRack() in NetworkTopology.  
+    (Hairong Kuang via tomwhite)
+
+41. HADOOP-1218.  Make synchronization on TaskTracker's RunningJob 
+    object consistent.  (Devaraj Das via tomwhite)
+
+42. HADOOP-1219.  Ignore progress report once a task has reported as 
+    'done'.  (Devaraj Das via tomwhite)
+
+43. HADOOP-1114.  Permit user to specify additional CLASSPATH elements
+    with a HADOOP_CLASSPATH environment variable. (cutting)
+
+44. HADOOP-1198.  Remove ipc.client.timeout parameter override from 
+    unit test configuration.  Using the default is more robust and
+    has almost the same run time.  (Arun C Murthy via tomwhite)
+
+45. HADOOP-1211.  Remove deprecated constructor and unused static 
+    members in DataNode class.  (Konstantin Shvachko via tomwhite)
+
+46. HADOOP-1136.  Fix ArrayIndexOutOfBoundsException in 
+    FSNamesystem$UnderReplicatedBlocks add() method.  
+    (Hairong Kuang via tomwhite)
+
+47. HADOOP-978.  Add the client name and the address of the node that
+    previously started to create the file to the description of 
+    AlreadyBeingCreatedException.  (Konstantin Shvachko via tomwhite)
+
+48. HADOOP-1001.  Check the type of keys and values generated by the 
+    mapper against the types specified in JobConf.  
+    (Tahir Hashmi via tomwhite)
+
+49. HADOOP-971.  Improve DFS Scalability: Improve name node performance
+    by adding a hostname to datanodes map.  (Hairong Kuang via tomwhite)
+
+50. HADOOP-1189.  Fix 'No space left on device' exceptions on datanodes.
+    (Raghu Angadi via tomwhite)
+
+51. HADOOP-819.  Change LineRecordWriter to not insert a tab between
+    key and value when either is null, and to print nothing when both
+    are null.  (Runping Qi via cutting)
+
+52. HADOOP-1204.  Rename InputFormatBase to be FileInputFormat, and
+    deprecate InputFormatBase.  Also make LineRecordReader easier to
+    extend.  (Runping Qi via cutting)
+
+53. HADOOP-1213.  Improve logging of errors by IPC server, to
+    consistently include the service name and the call.  (cutting)
+
+54. HADOOP-1238.  Fix metrics reporting by TaskTracker to correctly
+    track maps_running and reduces_running.
+    (Michael Bieniosek via cutting)
+
+55. HADOOP-1093.  Fix a race condition in HDFS where blocks were
+    sometimes erased before they were reported written.
+    (Dhruba Borthakur via cutting)
+
+56. HADOOP-1239.  Add a package name to some testjar test classes.
+    (Jim Kellerman via cutting)
+
+57. HADOOP-1241.  Fix NullPointerException in processReport when 
+    namenode is restarted.  (Dhruba Borthakur via tomwhite)
+
+58. HADOOP-1244.  Fix stop-dfs.sh to no longer incorrectly specify 
+    slaves file for stopping datanode.  
+    (Michael Bieniosek via tomwhite)
+
+59. HADOOP-1253.  Fix ConcurrentModificationException and 
+    NullPointerException in JobControl.  
+    (Johan Oskarson via tomwhite)
+
+60. HADOOP-1256.  Fix NameNode so that multiple DataNodeDescriptors
+    can no longer be created on startup.  (Hairong Kuang via cutting)
+
+61. HADOOP-1214.  Replace streaming classes with new counterparts 
+    from Hadoop core.  (Runping Qi via tomwhite)
+
+62. HADOOP-1250.  Move a chmod utility from streaming to FileUtil.
+    (omalley via cutting)
+
+63. HADOOP-1258.  Fix TestCheckpoint test case to wait for 
+    MiniDFSCluster to be active.  (Nigel Daley via tomwhite)
+
+64. HADOOP-1148.  Re-indent all Java source code to consistently use
+    two spaces per indent level.  (cutting)
+
+65. HADOOP-1251.  Add a method to Reporter to get the map InputSplit.
+    (omalley via cutting)
+
+66. HADOOP-1224.  Fix "Browse the filesystem" link to no longer point 
+    to dead datanodes.  (Enis Soztutar via tomwhite)
+
+67. HADOOP-1154.  Fail a streaming task if the threads reading from or 
+    writing to the streaming process fail.  (Koji Noguchi via tomwhite)
+
+68. HADOOP-968.  Move shuffle and sort to run in reduce's child JVM,
+    rather than in TaskTracker.  (Devaraj Das via cutting)
+
+69. HADOOP-1111.  Add support for client notification of job
+    completion. If the job configuration has a job.end.notification.url
+    property it will make a HTTP GET request to the specified URL.
+    The number of retries and the interval between retries is also
+    configurable. (Alejandro Abdelnur via tomwhite)
+
+70. HADOOP-1275.  Fix misspelled job notification property in
+    hadoop-default.xml.  (Alejandro Abdelnur via tomwhite)
+
+71. HADOOP-1152.  Fix race condition in MapOutputCopier.copyOutput file
+    rename causing possible reduce task hang.
+    (Tahir Hashmi via tomwhite)
+
+72. HADOOP-1050.  Distinguish between failed and killed tasks so as to 
+    not count a lost tasktracker against the job.  
+    (Arun C Murthy via tomwhite)
+
+73. HADOOP-1271.  Fix StreamBaseRecordReader to be able to log record 
+    data that's not UTF-8.  (Arun C Murthy via tomwhite)
+
+74. HADOOP-1190.  Fix unchecked warnings in main Hadoop code.  
+    (tomwhite)
+
+75. HADOOP-1127.  Fix AlreadyBeingCreatedException in namenode for 
+    jobs run with speculative execution.
+    (Arun C Murthy via tomwhite)
+
+76. HADOOP-1282.  Omnibus HBase patch.  Improved tests & configuration.
+    (Jim Kellerman via cutting)
+
+77. HADOOP-1262.  Make dfs client try to read from a different replica 
+    of the checksum file when a checksum error is detected.  
+    (Hairong Kuang via tomwhite)
+
+78. HADOOP-1279.  Fix JobTracker to maintain list of recently
+    completed jobs by order of completion, not submission.
+    (Arun C Murthy via cutting)
+
+79. HADOOP-1284.  In contrib/streaming, permit flexible specification
+    of field delimiter and fields for partitioning and sorting.
+    (Runping Qi via cutting)
+
+80. HADOOP-1176.  Fix a bug where reduce would hang when a map had
+    more than 2GB of output for it.  (Arun C Murthy via cutting)
+
+81. HADOOP-1293.  Fix contrib/streaming to print more than the first
+    twenty lines of standard error.  (Koji Noguchi via cutting)
+
+82. HADOOP-1297.  Fix datanode so that requests to remove blocks that
+    do not exist no longer causes block reports to be re-sent every
+    second.  (Dhruba Borthakur via cutting)
+
+83. HADOOP-1216.  Change MapReduce so that, when numReduceTasks is
+    zero, map outputs are written directly as final output, skipping
+    shuffle, sort and reduce.  Use this to implement reduce=NONE
+    option in contrib/streaming.  (Runping Qi via cutting)
+
+84. HADOOP-1294.  Fix unchecked warnings in main Hadoop code under 
+    Java 6.  (tomwhite)
+
+85. HADOOP-1299.  Fix so that RPC will restart after RPC.stopClient()
+    has been called.  (Michael Stack via cutting)
+
+86. HADOOP-1278.  Improve blacklisting of TaskTrackers by JobTracker,
+    to reduce false positives.  (Arun C Murthy via cutting)
+
+87. HADOOP-1290.  Move contrib/abacus into mapred/lib/aggregate.
+    (Runping Qi via cutting)
+
+88. HADOOP-1272.  Extract inner classes from FSNamesystem into separate 
+    classes.  (Dhruba Borthakur via tomwhite)
+
+89. HADOOP-1247.  Add support to contrib/streaming for aggregate
+    package, formerly called Abacus.  (Runping Qi via cutting)
+
+90. HADOOP-1061.  Fix bug in listing files in the S3 filesystem.
+    NOTE: this change is not backwards compatible!  You should use the 
+    MigrationTool supplied to migrate existing S3 filesystem data to 
+    the new format.  Please backup your data first before upgrading 
+    (using 'hadoop distcp' for example).  (tomwhite)
+
+91. HADOOP-1304.  Make configurable the maximum number of task
+    attempts before a job fails.  (Devaraj Das via cutting)
+
+92. HADOOP-1308.  Use generics to restrict types when classes are
+    passed as parameters to JobConf methods. (Michael Bieniosek via cutting)
+
+93. HADOOP-1312.  Fix a ConcurrentModificationException in NameNode
+    that killed the heartbeat monitoring thread.
+    (Dhruba Borthakur via cutting)
+
+94. HADOOP-1315.  Clean up contrib/streaming, switching it to use core
+    classes more and removing unused code.  (Runping Qi via cutting)
+
+95. HADOOP-485.  Allow a different comparator for grouping keys in
+    calls to reduce.  (Tahir Hashmi via cutting)
+
+96. HADOOP-1322.  Fix TaskTracker blacklisting to work correctly in
+    one- and two-node clusters.  (Arun C Murthy via cutting)
+
+97. HADOOP-1144.  Permit one to specify a maximum percentage of tasks
+    that can fail before a job is aborted.  The default is zero.
+    (Arun C Murthy via cutting)
+
+98. HADOOP-1184.  Fix HDFS decomissioning to complete when the only
+    copy of a block is on a decommissioned node. (Dhruba Borthakur via cutting)
+
+99. HADOOP-1263.  Change DFSClient to retry certain namenode calls
+    with a random, exponentially increasing backoff time, to avoid
+    overloading the namenode on, e.g., job start.  (Hairong Kuang via cutting)
+
+100. HADOOP-1325.  First complete, functioning version of HBase.
+    (Jim Kellerman via cutting)
+
+101. HADOOP-1276.  Make tasktracker expiry interval configurable.
+    (Arun C Murthy via cutting)
+
+102. HADOOP-1326.  Change JobClient#RunJob() to return the job.
+    (omalley via cutting)
+
+103. HADOOP-1270.  Randomize the fetch of map outputs, speeding the
+     shuffle.  (Arun C Murthy via cutting)
+
+104. HADOOP-1200.  Restore disk checking lost in HADOOP-1170.
+     (Hairong Kuang via cutting)
+
+105. HADOOP-1252.  Changed MapReduce's allocation of local files to
+     use round-robin among available devices, rather than a hashcode.
+     More care is also taken to not allocate files on full or offline
+     drives.  (Devaraj Das via cutting)
+
+106. HADOOP-1324.  Change so that an FSError kills only the task that
+     generates it rather than the entire task tracker.
+     (Arun C Murthy via cutting)
+
+107. HADOOP-1310.  Fix unchecked warnings in aggregate code.  (tomwhite)
+
+108. HADOOP-1255.  Fix a bug where the namenode falls into an infinite
+     loop trying to remove a dead node.  (Hairong Kuang via cutting)
+
+109. HADOOP-1160.  Fix DistributedFileSystem.close() to close the
+     underlying FileSystem, correctly aborting files being written.
+     (Hairong Kuang via cutting)
+
+110. HADOOP-1341.  Fix intermittent failures in HBase unit tests
+     caused by deadlock.  (Jim Kellerman via cutting)
+
+111. HADOOP-1350.  Fix shuffle performance problem caused by forcing
+     chunked encoding of map outputs.  (Devaraj Das via cutting)
+
+112. HADOOP-1345.  Fix HDFS to correctly retry another replica when a
+     checksum error is encountered.  (Hairong Kuang via cutting)
+
+113. HADOOP-1205.  Improve synchronization around HDFS block map.
+     (Hairong Kuang via cutting)
+
+114. HADOOP-1353.  Fix a potential NullPointerException in namenode.
+     (Dhruba Borthakur via cutting)
+
+115. HADOOP-1354.  Fix a potential NullPointerException in FsShell.
+     (Hairong Kuang via cutting)
+
+116. HADOOP-1358.  Fix a potential bug when DFSClient calls skipBytes.
+     (Hairong Kuang via cutting)
+
+117. HADOOP-1356.  Fix a bug in ValueHistogram.  (Runping Qi via cutting)
+
+118. HADOOP-1363.  Fix locking bug in JobClient#waitForCompletion().
+     (omalley via cutting)
+
+119. HADOOP-1368.  Fix inconsistent synchronization in JobInProgress.
+     (omalley via cutting)
+
+120. HADOOP-1369.  Fix inconsistent synchronization in TaskTracker.
+     (omalley via cutting)
+
+121. HADOOP-1361.  Fix various calls to skipBytes() to check return
+     value. (Hairong Kuang via cutting)
+
+122. HADOOP-1388.  Fix a potential NullPointerException in web ui.
+     (Devaraj Das via cutting)
+
+123. HADOOP-1385.  Fix MD5Hash#hashCode() to generally hash to more
+     than 256 values.  (omalley via cutting)
+
+124. HADOOP-1386.  Fix Path to not permit the empty string as a
+     path, as this has lead to accidental file deletion.  Instead
+     force applications to use "." to name the default directory.
+     (Hairong Kuang via cutting)
+
+125. HADOOP-1407.  Fix integer division bug in JobInProgress which
+     meant failed tasks didn't cause the job to fail.
+     (Arun C Murthy via tomwhite)
+
+126. HADOOP-1427.  Fix a typo that caused GzipCodec to incorrectly use
+     a very small input buffer.  (Espen Amble Kolstad via cutting)
+
+127. HADOOP-1435.  Fix globbing code to no longer use the empty string
+     to indicate the default directory, per HADOOP-1386.
+     (Hairong Kuang via cutting)
+
+128. HADOOP-1411.  Make task retry framework handle 
+     AlreadyBeingCreatedException when wrapped as a RemoteException.
+     (Hairong Kuang via tomwhite)
+
+129. HADOOP-1242.  Improve handling of DFS upgrades.
+     (Konstantin Shvachko via cutting)
+
+130. HADOOP-1332.  Fix so that TaskTracker exits reliably during unit
+     tests on Windows.  (omalley via cutting)
+
+131. HADOOP-1431.  Fix so that sort progress reporting during map runs
+     only while sorting, so that stuck maps are correctly terminated.
+     (Devaraj Das and Arun C Murthy via cutting)
+
+132. HADOOP-1452.  Change TaskTracker.MapOutputServlet.doGet.totalRead
+     to a long, permitting map outputs to exceed 2^31 bytes.
+     (omalley via cutting)
+
+133. HADOOP-1443.  Fix a bug opening zero-length files in HDFS.
+     (Konstantin Shvachko via cutting)
+
+
+Release 0.12.3 - 2007-04-06
+
+ 1. HADOOP-1162.  Fix bug in record CSV and XML serialization of
+    binary values.  (Milind Bhandarkar via cutting)
+
+ 2. HADOOP-1123.  Fix NullPointerException in LocalFileSystem when
+    trying to recover from a checksum error.
+    (Hairong Kuang & Nigel Daley via tomwhite)
+
+ 3. HADOOP-1177.  Fix bug where IOException in MapOutputLocation.getFile
+    was not being logged.  (Devaraj Das via tomwhite)
+
+ 4. HADOOP-1175.  Fix bugs in JSP for displaying a task's log messages.
+    (Arun C Murthy via cutting)
+
+ 5. HADOOP-1191.  Fix map tasks to wait until sort progress thread has
+    stopped before reporting the task done.  (Devaraj Das via cutting)
+
+ 6. HADOOP-1192.  Fix an integer overflow bug in FSShell's 'dus'
+    command and a performance problem in HDFS's implementation of it.
+    (Hairong Kuang via cutting)
+
+ 7. HADOOP-1105. Fix reducers to make "progress" while iterating 
+    through values.  (Devaraj Das & Owen O'Malley via tomwhite)
+
+ 8. HADOOP-1179. Make Task Tracker close index file as soon as the read 
+    is done when serving get-map-output requests.  
+    (Devaraj Das via tomwhite)
+
+
+Release 0.12.2 - 2007-23-17
+
+ 1. HADOOP-1135.  Fix bug in block report processing which may cause
+    the namenode to delete blocks.  (Dhruba Borthakur via tomwhite)
+
+ 2. HADOOP-1145.  Make XML serializer and deserializer classes public
+    in record package.  (Milind Bhandarkar via cutting)
+
+ 3. HADOOP-1140.  Fix a deadlock in metrics. (David Bowen via cutting)
+
+ 4. HADOOP-1150.  Fix streaming -reducer and -mapper to give them
+    defaults. (Owen O'Malley via tomwhite)
+
+
+Release 0.12.1 - 2007-03-17
+
+ 1. HADOOP-1035.  Fix a StackOverflowError in FSDataSet.
+    (Raghu Angadi via cutting)
+
+ 2. HADOOP-1053.  Fix VInt representation of negative values.  Also
+    remove references in generated record code to methods outside of
+    the record package and improve some record documentation.
+    (Milind Bhandarkar via cutting)
+
+ 3. HADOOP-1067.  Compile fails if Checkstyle jar is present in lib
+    directory. Also remove dependency on a particular Checkstyle
+    version number. (tomwhite)
+
+ 4. HADOOP-1060.  Fix an IndexOutOfBoundsException in the JobTracker
+    that could cause jobs to hang.  (Arun C Murthy via cutting)
+
+ 5. HADOOP-1077.  Fix a race condition fetching map outputs that could
+    hang reduces.  (Devaraj Das via cutting)
+
+ 6. HADOOP-1083.  Fix so that when a cluster restarts with a missing
+    datanode, its blocks are replicated.  (Hairong Kuang via cutting)
+
+ 7. HADOOP-1082.  Fix a NullPointerException in ChecksumFileSystem.
+    (Hairong Kuang via cutting)
+
+ 8. HADOOP-1088.  Fix record serialization of negative values.
+    (Milind Bhandarkar via cutting)
+
+ 9. HADOOP-1080.  Fix bug in bin/hadoop on Windows when native
+    libraries are present.  (ab via cutting)
+
+10. HADOOP-1091.  Fix a NullPointerException in MetricsRecord.
+    (David Bowen via tomwhite)
+
+11. HADOOP-1092.  Fix a NullPointerException in HeartbeatMonitor
+    thread. (Hairong Kuang via tomwhite)
+
+12. HADOOP-1112.  Fix a race condition in Hadoop metrics.
+    (David Bowen via tomwhite)
+
+13. HADOOP-1108.  Checksummed file system should retry reading if a
+    different replica is found when handling ChecksumException.
+    (Hairong Kuang via tomwhite)
+
+14. HADOOP-1070.  Fix a problem with number of racks and datanodes
+    temporarily doubling.  (Konstantin Shvachko via tomwhite)
+
+15. HADOOP-1099.  Fix NullPointerException in JobInProgress.
+    (Gautam Kowshik via tomwhite)
+
+16. HADOOP-1115.  Fix bug where FsShell copyToLocal doesn't
+    copy directories.  (Hairong Kuang via tomwhite)
+
+17. HADOOP-1109.  Fix NullPointerException in StreamInputFormat.
+    (Koji Noguchi via tomwhite)
+
+18. HADOOP-1117.  Fix DFS scalability: when the namenode is
+    restarted it consumes 80% CPU. (Dhruba Borthakur via
+    tomwhite)
+
+19. HADOOP-1089.  Make the C++ version of write and read v-int
+    agree with the Java versions.  (Milind Bhandarkar via
+    tomwhite)
+
+20. HADOOP-1096.  Rename InputArchive and OutputArchive and
+    make them public. (Milind Bhandarkar via tomwhite)
+
+21. HADOOP-1128.  Fix missing progress information in map tasks.
+    (Espen Amble Kolstad, Andrzej Bialecki, and Owen O'Malley
+    via tomwhite)
+
+22. HADOOP-1129.  Fix DFSClient to not hide IOExceptions in
+    flush method.  (Hairong Kuang via tomwhite)
+
+23. HADOOP-1126.  Optimize CPU usage for under replicated blocks
+    when cluster restarts.  (Hairong Kuang via tomwhite)
+
+
+Release 0.12.0 - 2007-03-02
+
+ 1. HADOOP-975.  Separate stdout and stderr from tasks.
+    (Arun C Murthy via cutting)
+
+ 2. HADOOP-982.  Add some setters and a toString() method to
+    BytesWritable.  (omalley via cutting)
+
+ 3. HADOOP-858.  Move contrib/smallJobsBenchmark to src/test, removing
+    obsolete bits. (Nigel Daley via cutting)
+
+ 4. HADOOP-992.  Fix MiniMR unit tests to use MiniDFS when specified,
+    rather than the local FS.  (omalley via cutting)
+
+ 5. HADOOP-954.  Change use of metrics to use callback mechanism.
+    Also rename utility class Metrics to MetricsUtil.
+    (David Bowen & Nigel Daley via cutting)
+
+ 6. HADOOP-893.  Improve HDFS client's handling of dead datanodes.
+    The set is no longer reset with each block, but rather is now
+    maintained for the life of an open file.  (Raghu Angadi via cutting)
+
+ 7. HADOOP-882.  Upgrade to jets3t version 0.5, used by the S3
+    FileSystem.  This version supports retries.  (Michael Stack via cutting)
+
+ 8. HADOOP-977.  Send task's stdout and stderr to JobClient's stdout
+    and stderr respectively, with each line tagged by the task's name.
+    (Arun C Murthy via cutting)
+
+ 9. HADOOP-761.  Change unit tests to not use /tmp.  (Nigel Daley via cutting)
+
+10. HADOOP-1007. Make names of metrics used in Hadoop unique.
+    (Nigel Daley via cutting)
+
+11. HADOOP-491.  Change mapred.task.timeout to be per-job, and make a
+    value of zero mean no timeout.  Also change contrib/streaming to
+    disable task timeouts.  (Arun C Murthy via cutting)
+
+12. HADOOP-1010.  Add Reporter.NULL, a Reporter implementation that
+    does nothing.  (Runping Qi via cutting)
+
+13. HADOOP-923.  In HDFS NameNode, move replication computation to a
+    separate thread, to improve heartbeat processing time.
+    (Dhruba Borthakur via cutting) 
+
+14. HADOOP-476.  Rewrite contrib/streaming command-line processing,
+    improving parameter validation.  (Sanjay Dahiya via cutting)
+
+15. HADOOP-973.  Improve error messages in Namenode.  This should help
+    to track down a problem that was appearing as a
+    NullPointerException.  (Dhruba Borthakur via cutting) 
+
+16. HADOOP-649.  Fix so that jobs with no tasks are not lost.
+    (Thomas Friol via cutting)
+
+17. HADOOP-803.  Reduce memory use by HDFS namenode, phase I.
+    (Raghu Angadi via cutting)
+
+18. HADOOP-1021.  Fix MRCaching-based unit tests on Windows.
+    (Nigel Daley via cutting)
+
+19. HADOOP-889.  Remove duplicate code from HDFS unit tests.
+    (Milind Bhandarkar via cutting)
+
+20. HADOOP-943.  Improve HDFS's fsck command to display the filename
+    for under-replicated blocks.  (Dhruba Borthakur via cutting) 
+
+21. HADOOP-333.  Add validator for sort benchmark output.
+    (Arun C Murthy via cutting)
+
+22. HADOOP-947.  Improve performance of datanode decomissioning.
+    (Dhruba Borthakur via cutting)
+
+23. HADOOP-442.  Permit one to specify hosts allowed to connect to
+    namenode and jobtracker with include and exclude files.  (Wendy
+    Chien via cutting)
+
+24. HADOOP-1017.  Cache constructors, for improved performance.
+    (Ron Bodkin via cutting)
+
+25. HADOOP-867.  Move split creation out of JobTracker to client.
+    Splits are now saved in a separate file, read by task processes
+    directly, so that user code is no longer required in the
+    JobTracker.  (omalley via cutting)
+
+26. HADOOP-1006.  Remove obsolete '-local' option from test code.
+    (Gautam Kowshik via cutting)
+
+27. HADOOP-952. Create a public (shared) Hadoop EC2 AMI.
+    The EC2 scripts now support launch of public AMIs.
+    (tomwhite)
+    
+28. HADOOP-1025. Remove some obsolete code in ipc.Server.  (cutting)
+
+29. HADOOP-997. Implement S3 retry mechanism for failed block
+    transfers. This includes a generic retry mechanism for use
+    elsewhere in Hadoop. (tomwhite)
+
+30. HADOOP-990.  Improve HDFS support for full datanode volumes.
+    (Raghu Angadi via cutting)
+
+31. HADOOP-564.  Replace uses of "dfs://" URIs with the more standard
+    "hdfs://".  (Wendy Chien via cutting)
+
+32. HADOOP-1030.  In unit tests, unify setting of ipc.client.timeout.
+    Also increase the value used from one to two seconds, in hopes of
+    making tests complete more reliably.  (cutting)
+
+33. HADOOP-654.  Stop assigning tasks to a tasktracker if it has
+    failed more than a specified number in the job.
+    (Arun C Murthy via cutting)
+
+34. HADOOP-985.  Change HDFS to identify nodes by IP address rather
+    than by DNS hostname.  (Raghu Angadi via cutting)
+
+35. HADOOP-248.  Optimize location of map outputs to not use random
+    probes.  (Devaraj Das via cutting)
+
+36. HADOOP-1029.  Fix streaming's input format to correctly seek to
+    the start of splits.  (Arun C Murthy via cutting)
+
+37. HADOOP-492.  Add per-job and per-task counters.  These are
+    incremented via the Reporter interface and available through the
+    web ui and the JobClient API.  The mapreduce framework maintains a
+    few basic counters, and applications may add their own.  Counters
+    are also passed to the metrics system.
+    (David Bowen via cutting)
+
+38. HADOOP-1034.  Fix datanode to better log exceptions.
+    (Philippe Gassmann via cutting)
+
+39. HADOOP-878.  In contrib/streaming, fix reducer=NONE to work with
+    multiple maps.  (Arun C Murthy via cutting)
+
+40. HADOOP-1039.  In HDFS's TestCheckpoint, avoid restarting
+    MiniDFSCluster so often, speeding this test.  (Dhruba Borthakur via cutting)
+
+41. HADOOP-1040.  Update RandomWriter example to use counters and
+    user-defined input and output formats.  (omalley via cutting)
+
+42. HADOOP-1027.  Fix problems with in-memory merging during shuffle
+    and re-enable this optimization.  (Devaraj Das via cutting)
+
+43. HADOOP-1036.  Fix exception handling in TaskTracker to keep tasks
+    from being lost.  (Arun C Murthy via cutting)
+
+44. HADOOP-1042.  Improve the handling of failed map output fetches.
+    (Devaraj Das via cutting)
+
+45. HADOOP-928.  Make checksums optional per FileSystem.
+    (Hairong Kuang via cutting)
+
+46. HADOOP-1044.  Fix HDFS's TestDecommission to not spuriously fail.
+    (Wendy Chien via cutting)
+
+47. HADOOP-972.  Optimize HDFS's rack-aware block placement algorithm.
+    (Hairong Kuang via cutting)
+
+48. HADOOP-1043.  Optimize shuffle, increasing parallelism.
+    (Devaraj Das via cutting)
+
+49. HADOOP-940.  Improve HDFS's replication scheduling.
+    (Dhruba Borthakur via cutting) 
+
+50. HADOOP-1020.  Fix a bug in Path resolution, and a with unit tests
+    on Windows.  (cutting)
+
+51. HADOOP-941.  Enhance record facility.
+    (Milind Bhandarkar via cutting)
+
+52. HADOOP-1000.  Fix so that log messages in task subprocesses are
+    not written to a task's standard error.  (Arun C Murthy via cutting)
+
+53. HADOOP-1037.  Fix bin/slaves.sh, which currently only works with
+    /bin/bash, to specify /bin/bash rather than /bin/sh.  (cutting)
+
+54. HADOOP-1046. Clean up tmp from partially received stale block files. (ab)
+
+55. HADOOP-1041.  Optimize mapred counter implementation.  Also group
+    counters by their declaring Enum.  (David Bowen via cutting)
+
+56. HADOOP-1032.  Permit one to specify jars that will be cached
+    across multiple jobs.  (Gautam Kowshik via cutting)
+
+57. HADOOP-1051.  Add optional checkstyle task to build.xml.  To use
+    this developers must download the (LGPL'd) checkstyle jar
+    themselves.  (tomwhite via cutting)
+
+58. HADOOP-1049.  Fix a race condition in IPC client.
+    (Devaraj Das via cutting)
+
+60. HADOOP-1056.  Check HDFS include/exclude node lists with both IP
+    address and hostname.  (Wendy Chien via cutting)
+
+61. HADOOP-994.  In HDFS, limit the number of blocks invalidated at
+    once.  Large lists were causing datenodes to timeout.
+    (Dhruba Borthakur via cutting) 
+
+62. HADOOP-432.  Add a trash feature, disabled by default.  When
+    enabled, the FSShell 'rm' command will move things to a trash
+    directory in the filesystem.  In HDFS, a thread periodically
+    checkpoints the trash and removes old checkpoints.  (cutting)
+
+
+Release 0.11.2 - 2007-02-16
+
+ 1. HADOOP-1009.  Fix an infinite loop in the HDFS namenode.
+    (Dhruba Borthakur via cutting) 
+
+ 2. HADOOP-1014.  Disable in-memory merging during shuffle, as this is
+    causing data corruption.  (Devaraj Das via cutting)
+
+
+Release 0.11.1 - 2007-02-09
+
+ 1. HADOOP-976.  Make SequenceFile.Metadata public.  (Runping Qi via cutting)
+
+ 2. HADOOP-917.  Fix a NullPointerException in SequenceFile's merger
+    with large map outputs.  (omalley via cutting)
+
+ 3. HADOOP-984.  Fix a bug in shuffle error handling introduced by
+    HADOOP-331.  If a map output is unavailable, the job tracker is
+    once more informed.  (Arun C Murthy via cutting)
+
+ 4. HADOOP-987.  Fix a problem in HDFS where blocks were not removed
+    from neededReplications after a replication target was selected.
+    (Hairong Kuang via cutting)
+
+Release 0.11.0 - 2007-02-02
+
+ 1. HADOOP-781.  Remove methods deprecated in 0.10 that are no longer
+    widely used.  (cutting)
+
+ 2. HADOOP-842.  Change HDFS protocol so that the open() method is
+    passed the client hostname, to permit the namenode to order block
+    locations on the basis of network topology.
+    (Hairong Kuang via cutting)
+
+ 3. HADOOP-852.  Add an ant task to compile record definitions, and
+    use it to compile record unit tests.  (Milind Bhandarkar via cutting)
+
+ 4. HADOOP-757.  Fix "Bad File Descriptor" exception in HDFS client
+    when an output file is closed twice.  (Raghu Angadi via cutting)
+
+ 5. [ intentionally blank ]
+
+ 6. HADOOP-890.  Replace dashes in metric names with underscores,
+    for better compatibility with some monitoring systems.
+    (Nigel Daley via cutting)
+
+ 7. HADOOP-801.  Add to jobtracker a log of task completion events.
+    (Sanjay Dahiya via cutting)
+
+ 8. HADOOP-855.  In HDFS, try to repair files with checksum errors.
+    An exception is still thrown, but corrupt blocks are now removed
+    when they have replicas.  (Wendy Chien via cutting)
+
+ 9. HADOOP-886.  Reduce number of timer threads created by metrics API
+    by pooling contexts.  (Nigel Daley via cutting)
+
+10. HADOOP-897.  Add a "javac.args" property to build.xml that permits
+    one to pass arbitrary options to javac. (Milind Bhandarkar via cutting)
+
+11. HADOOP-899.  Update libhdfs for changes in HADOOP-871.
+    (Sameer Paranjpye via cutting)
+
+12. HADOOP-905.  Remove some dead code from JobClient.  (cutting)
+
+13. HADOOP-902.  Fix a NullPointerException in HDFS client when
+    closing output streams.  (Raghu Angadi via cutting)
+
+14. HADOOP-735.  Switch generated record code to use BytesWritable to
+    represent fields of type 'buffer'. (Milind Bhandarkar via cutting)
+
+15. HADOOP-830.  Improve mapreduce merge performance by buffering and
+    merging multiple map outputs as they arrive at reduce nodes before
+    they're written to disk.  (Devaraj Das via cutting)
+
+16. HADOOP-908.  Add a new contrib package, Abacus, that simplifies
+    counting and aggregation, built on MapReduce.  (Runping Qi via cutting)
+
+17. HADOOP-901.  Add support for recursive renaming to the S3 filesystem.
+    (Tom White via cutting)
+
+18. HADOOP-912.  Fix a bug in TaskTracker.isIdle() that was
+    sporadically causing unit test failures.  (Arun C Murthy via cutting)
+
+19. HADOOP-909.  Fix the 'du' command to correctly compute the size of
+    FileSystem directory trees.  (Hairong Kuang via cutting)
+
+20. HADOOP-731.  When a checksum error is encountered on a file stored
+    in HDFS, try another replica of the data, if any.
+    (Wendy Chien via cutting)
+
+21. HADOOP-732.  Add support to SequenceFile for arbitrary metadata,
+    as a set of attribute value pairs.  (Runping Qi via cutting)
+
+22. HADOOP-929.  Fix PhasedFileSystem to pass configuration to
+    underlying FileSystem.  (Sanjay Dahiya via cutting)
+
+23. HADOOP-935.  Fix contrib/abacus to not delete pre-existing output
+    files, but rather to fail in this case.  (Runping Qi via cutting)
+
+24. HADOOP-936.  More metric renamings, as in HADOOP-890.
+    (Nigel Daley via cutting)
+
+25. HADOOP-856.  Fix HDFS's fsck command to not report that
+    non-existent filesystems are healthy.  (Milind Bhandarkar via cutting)
+
+26. HADOOP-602.  Remove the dependency on Lucene's PriorityQueue
+    utility, by copying it into Hadoop.  This facilitates using Hadoop
+    with different versions of Lucene without worrying about CLASSPATH
+    order.  (Milind Bhandarkar via cutting)
+
+27. [ intentionally blank ]
+
+28. HADOOP-227.  Add support for backup namenodes, which periodically
+    get snapshots of the namenode state.  (Dhruba Borthakur via cutting) 
+
+29. HADOOP-884.  Add scripts in contrib/ec2 to facilitate running
+    Hadoop on an Amazon's EC2 cluster.  (Tom White via cutting)
+
+30. HADOOP-937.  Change the namenode to request re-registration of
+    datanodes in more circumstances.  (Hairong Kuang via cutting)
+
+31. HADOOP-922.  Optimize small forward seeks in HDFS.  If data is has
+    likely already in flight, skip ahead rather than re-opening the
+    block.  (Dhruba Borthakur via cutting)
+
+32. HADOOP-961.  Add a 'job -events' sub-command that prints job
+    events, including task completions and failures.  (omalley via cutting)
+
+33. HADOOP-959.  Fix namenode snapshot code added in HADOOP-227 to
+    work on Windows.  (Dhruba Borthakur via cutting)
+
+34. HADOOP-934.  Fix TaskTracker to catch metrics exceptions that were
+    causing heartbeats to fail.  (Arun Murthy via cutting)
+
+35. HADOOP-881.  Fix JobTracker web interface to display the correct
+    number of task failures.  (Sanjay Dahiya via cutting)
+
+36. HADOOP-788.  Change contrib/streaming to subclass TextInputFormat,
+    permitting it to take advantage of native compression facilities.
+    (Sanjay Dahiya via cutting)
+
+37. HADOOP-962.  In contrib/ec2: make scripts executable in tar file;
+    add a README; make the environment file use a template.
+    (Tom White via cutting)
+
+38. HADOOP-549.  Fix a NullPointerException in TaskReport's
+    serialization.  (omalley via cutting)
+
+39. HADOOP-963.  Fix remote exceptions to have the stack trace of the
+    caller thread, not the IPC listener thread.  (omalley via cutting)
+
+40. HADOOP-967.  Change RPC clients to start sending a version header.
+    (omalley via cutting)
+
+41. HADOOP-964.  Fix a bug introduced by HADOOP-830 where jobs failed
+    whose comparators and/or i/o types were in the job's jar.
+    (Dennis Kubes via cutting)
+
+42. HADOOP-969.  Fix a deadlock in JobTracker.  (omalley via cutting)
+
+43. HADOOP-862.  Add support for the S3 FileSystem to the CopyFiles
+    tool.  (Michael Stack via cutting)
+
+44. HADOOP-965.  Fix IsolationRunner so that job's jar can be found.
+    (Dennis Kubes via cutting)
+
+45. HADOOP-309.  Fix two NullPointerExceptions in StatusHttpServer.
+    (navychen via cutting)
+
+46. HADOOP-692.  Add rack awareness to HDFS's placement of blocks.
+    (Hairong Kuang via cutting)
+
+
+Release 0.10.1 - 2007-01-10
+
+ 1. HADOOP-857.  Fix S3 FileSystem implementation to permit its use
+    for MapReduce input and output.  (Tom White via cutting)
+
+ 2. HADOOP-863.  Reduce logging verbosity introduced by HADOOP-813.
+    (Devaraj Das via cutting)
+
+ 3. HADOOP-815.  Fix memory leaks in JobTracker. (Arun C Murthy via cutting)
+
+ 4. HADOOP-600.  Fix a race condition in JobTracker.
+    (Arun C Murthy via cutting)
+
+ 5. HADOOP-864.  Fix 'bin/hadoop -jar' to operate correctly when
+    hadoop.tmp.dir does not yet exist.  (omalley via cutting)
+
+ 6. HADOOP-866.  Fix 'dfs -get' command to remove existing crc files,
+    if any.  (Milind Bhandarkar via cutting)
+
+ 7. HADOOP-871.  Fix a bug in bin/hadoop setting JAVA_LIBRARY_PATH.
+    (Arun C Murthy via cutting)
+
+ 8. HADOOP-868.  Decrease the number of open files during map,
+    respecting io.sort.fa ctor.  (Devaraj Das via cutting)
+
+ 9. HADOOP-865.  Fix S3 FileSystem so that partially created files can
+    be deleted.  (Tom White via cutting)
+
+10. HADOOP-873.	 Pass java.library.path correctly to child processes.
+    (omalley via cutting)
+
+11. HADOOP-851.  Add support for the LZO codec.  This is much faster
+    than the default, zlib-based compression, but it is only available
+    when the native library is built.  (Arun C Murthy via cutting)
+
+12. HADOOP-880.  Fix S3 FileSystem to remove directories.
+    (Tom White via cutting)
+
+13. HADOOP-879.  Fix InputFormatBase to handle output generated by
+    MapFileOutputFormat.  (cutting)
+
+14. HADOOP-659.  In HDFS, prioritize replication of blocks based on
+    current replication level.  Blocks which are severely
+    under-replicated should be further replicated before blocks which
+    are less under-replicated.  (Hairong Kuang via cutting)
+
+15. HADOOP-726.  Deprecate FileSystem locking methods.  They are not
+    currently usable.  Locking should eventually provided as an
+    independent service.  (Raghu Angadi via cutting)
+
+16. HADOOP-758.  Fix exception handling during reduce so that root
+    exceptions are not masked by exceptions in cleanups.
+    (Raghu Angadi via cutting)
+
+
+Release 0.10.0 - 2007-01-05
+
+ 1. HADOOP-763. Change DFS namenode benchmark to not use MapReduce.
+    (Nigel Daley via cutting)
+
+ 2. HADOOP-777. Use fully-qualified hostnames for tasktrackers and
+    datanodes.  (Mahadev Konar via cutting)
+
+ 3. HADOOP-621. Change 'dfs -cat' to exit sooner when output has been
+    closed.  (Dhruba Borthakur via cutting) 
+
+ 4. HADOOP-752. Rationalize some synchronization in DFS namenode.
+    (Dhruba Borthakur via cutting) 
+
+ 5. HADOOP-629. Fix RPC services to better check the protocol name and
+    version.  (omalley via cutting)
+
+ 6. HADOOP-774. Limit the number of invalid blocks returned with
+    heartbeats by the namenode to datanodes.  Transmitting and
+    processing very large invalid block lists can tie up both the
+    namenode and datanode for too long.  (Dhruba Borthakur via cutting) 
+
+ 7. HADOOP-738. Change 'dfs -get' command to not create CRC files by
+    default, adding a -crc option to force their creation.
+    (Milind Bhandarkar via cutting)
+
+ 8. HADOOP-676. Improved exceptions and error messages for common job
+    input specification errors.  (Sanjay Dahiya via cutting)
+
+ 9. [Included in 0.9.2 release]
+
+10. HADOOP-756. Add new dfsadmin option to wait for filesystem to be
+    operational.  (Dhruba Borthakur via cutting)
+
+11. HADOOP-770. Fix jobtracker web interface to display, on restart,
+    jobs that were running when it was last stopped.
+    (Sanjay Dahiya via cutting)
+
+12. HADOOP-331. Write all map outputs to a single file with an index,
+    rather than to a separate file per reduce task.  This should both
+    speed the shuffle and make things more scalable.
+    (Devaraj Das via cutting)
+
+13. HADOOP-818. Fix contrib unit tests to not depend on core unit
+    tests.  (omalley via cutting)
+
+14. HADOOP-786. Log common exception at debug level.
+    (Sanjay Dahiya via cutting)
+
+15. HADOOP-796. Provide more convenient access to failed task
+    information in the web interface.  (Sanjay Dahiya via cutting)
+
+16. HADOOP-764. Reduce memory allocations in namenode some.
+    (Dhruba Borthakur via cutting) 
+
+17. HADOOP-802. Update description of mapred.speculative.execution to
+    mention reduces.  (Nigel Daley via cutting)
+
+18. HADOOP-806. Include link to datanodes on front page of namenode
+    web interface.  (Raghu Angadi via cutting)
+
+19. HADOOP-618.  Make JobSubmissionProtocol public.
+    (Arun C Murthy via cutting)
+
+20. HADOOP-782.  Fully remove killed tasks.  (Arun C Murthy via cutting)
+
+21. HADOOP-792.  Fix 'dfs -mv' to return correct status.
+    (Dhruba Borthakur via cutting) 
+
+22. HADOOP-673.  Give each task its own working directory again.
+    (Mahadev Konar via cutting)
+
+23. HADOOP-571.  Extend the syntax of Path to be a URI; to be
+    optionally qualified with a scheme and authority.  The scheme
+    determines the FileSystem implementation, while the authority
+    determines the FileSystem instance.  New FileSystem
+    implementations may be provided by defining an fs.<scheme>.impl
+    property, naming the FileSystem implementation class.  This
+    permits easy integration of new FileSystem implementations.
+    (cutting)
+
+24. HADOOP-720.  Add an HDFS white paper to website.
+    (Dhruba Borthakur via cutting) 
+
+25. HADOOP-794.  Fix a divide-by-zero exception when a job specifies
+    zero map tasks.  (omalley via cutting)
+
+26. HADOOP-454.  Add a 'dfs -dus' command that provides summary disk
+    usage.  (Hairong Kuang via cutting)
+
+27. HADOOP-574.  Add an Amazon S3 implementation of FileSystem.  To
+    use this, one need only specify paths of the form
+    s3://id:secret@bucket/.  Alternately, the AWS access key id and
+    secret can be specified in your config, with the properties
+    fs.s3.awsAccessKeyId and fs.s3.awsSecretAccessKey.
+    (Tom White via cutting)
+
+28. HADOOP-824.  Rename DFSShell to be FsShell, since it applies
+    generically to all FileSystem implementations.  (cutting)
+
+29. HADOOP-813.  Fix map output sorting to report progress, so that
+    sorts which take longer than the task timeout do not fail.
+    (Devaraj Das via cutting)
+
+30. HADOOP-825.  Fix HDFS daemons when configured with new URI syntax.
+    (omalley via cutting)
+
+31. HADOOP-596.  Fix a bug in phase reporting during reduce.
+    (Sanjay Dahiya via cutting)
+
+32. HADOOP-811.  Add a utility, MultithreadedMapRunner.
+    (Alejandro Abdelnur via cutting)
+
+33. HADOOP-829.  Within HDFS, clearly separate three different
+    representations for datanodes: one for RPCs, one for
+    namenode-internal use, and one for namespace persistence.
+    (Dhruba Borthakur via cutting) 
+
+34. HADOOP-823.  Fix problem starting datanode when not all configured
+    data directories exist.  (Bryan Pendleton via cutting)
+
+35. HADOOP-451.  Add a Split interface.  CAUTION: This incompatibly
+    changes the InputFormat and RecordReader interfaces.  Not only is
+    FileSplit replaced with Split, but a FileSystem parameter is no
+    longer passed in several methods, input validation has changed,
+    etc.  (omalley via cutting)
+
+36. HADOOP-814.  Optimize locking in namenode. (Dhruba Borthakur via cutting) 
+
+37. HADOOP-738.  Change 'fs -put' and 'fs -get' commands to accept
+    standard input and output, respectively.  Standard i/o is
+    specified by a file named '-'.  (Wendy Chien via cutting)
+
+38. HADOOP-835.  Fix a NullPointerException reading record-compressed
+    SequenceFiles.  (Hairong Kuang via cutting)
+
+39. HADOOP-836.  Fix a MapReduce bug on Windows, where the wrong
+    FileSystem was used.  Also add a static FileSystem.getLocal()
+    method and better Path checking in HDFS, to help avoid such issues
+    in the future.  (omalley via cutting)
+
+40. HADOOP-837.  Improve RunJar utility to unpack jar file
+    hadoop.tmp.dir, rather than the system temporary directory.
+    (Hairong Kuang via cutting)
+
+41. HADOOP-841.  Fix native library to build 32-bit version even when
+    on a 64-bit host, if a 32-bit JVM is used.  (Arun C Murthy via cutting)
+
+42. HADOOP-838.  Fix tasktracker to pass java.library.path to
+    sub-processes, so that libhadoop.a is found.
+    (Arun C Murthy via cutting)
+
+43. HADOOP-844.  Send metrics messages on a fixed-delay schedule
+    instead of a fixed-rate schedule.  (David Bowen via cutting)
+
+44. HADOOP-849.  Fix OutOfMemory exceptions in TaskTracker due to a
+    file handle leak in SequenceFile.  (Devaraj Das via cutting)
+
+45. HADOOP-745.  Fix a synchronization bug in the HDFS namenode.
+    (Dhruba Borthakur via cutting)
+
+46. HADOOP-850.  Add Writable implementations for variable-length
+    integers.  (ab via cutting)
+
+47. HADOOP-525.  Add raw comparators to record types.  This greatly
+    improves record sort performance.  (Milind Bhandarkar via cutting)
+
+48. HADOOP-628.  Fix a problem with 'fs -cat' command, where some
+    characters were replaced with question marks.  (Wendy Chien via cutting)
+
+49. HADOOP-804.  Reduce verbosity of MapReduce logging.
+    (Sanjay Dahiya via cutting)
+
+50. HADOOP-853.  Rename 'site' to 'docs', in preparation for inclusion
+    in releases.  (cutting)
+
+51. HADOOP-371.  Include contrib jars and site documentation in
+    distributions.  Also add contrib and example documentation to
+    distributed javadoc, in separate sections.  (Nigel Daley via cutting)
+
+52. HADOOP-846.  Report progress during entire map, as sorting of
+    intermediate outputs may happen at any time, potentially causing
+    task timeouts.  (Devaraj Das via cutting)
+
+53. HADOOP-840.  In task tracker, queue task cleanups and perform them
+    in a separate thread.  (omalley & Mahadev Konar via cutting)
+
+54. HADOOP-681.  Add to HDFS the ability to decommission nodes.  This
+    causes their blocks to be re-replicated on other nodes, so that
+    they may be removed from a cluster.  (Dhruba Borthakur via cutting)
+
+55. HADOOP-470.  In HDFS web ui, list the datanodes containing each
+    copy of a block.  (Hairong Kuang via cutting)
+
+56. HADOOP-700.  Change bin/hadoop to only include core jar file on
+    classpath, not example, test, etc.  Also rename core jar to
+    hadoop-${version}-core.jar so that it can be more easily
+    identified.  (Nigel Daley via cutting)
+
+57. HADOOP-619.  Extend InputFormatBase to accept individual files and
+    glob patterns as MapReduce inputs, not just directories.  Also
+    change contrib/streaming to use this.  (Sanjay Dahia via cutting)
+
+
+Release 0.9.2 - 2006-12-15
+
+ 1. HADOOP-639. Restructure InterTrackerProtocol to make task
+    accounting more reliable.  (Arun C Murthy via cutting)
+
+ 2. HADOOP-827. Turn off speculative execution by default, since it's
+    currently broken.  (omalley via cutting)
+
+ 3. HADOOP-791. Fix a deadlock in the task tracker.
+    (Mahadev Konar via cutting)
+
+
+Release 0.9.1 - 2006-12-06
+
+ 1. HADOOP-780. Use ReflectionUtils to instantiate key and value
+    objects. (ab)
+
+ 2. HADOOP-779. Fix contrib/streaming to work correctly with gzipped
+    input files.  (Hairong Kuang via cutting)
+
+
+Release 0.9.0 - 2006-12-01
+
+ 1. HADOOP-655.  Remove most deprecated code.  A few deprecated things
+    remain, notably UTF8 and some methods that are still required.
+    Also cleaned up constructors for SequenceFile, MapFile, SetFile,
+    and ArrayFile a bit.  (cutting)
+
+ 2. HADOOP-565.  Upgrade to Jetty version 6. (Sanjay Dahiya via cutting)
+
+ 3. HADOOP-682.  Fix DFS format command to work correctly when
+    configured with a non-existent directory. (Sanjay Dahiya via cutting)
+
+ 4. HADOOP-645.  Fix a bug in contrib/streaming when -reducer is NONE.
+    (Dhruba Borthakur via cutting) 
+
+ 5. HADOOP-687.  Fix a classpath bug in bin/hadoop that blocked the
+    servers from starting. (Sameer Paranjpye via omalley)
+
+ 6. HADOOP-683.  Remove a script dependency on bash, so it works with
+    dash, the new default for /bin/sh on Ubuntu.  (James Todd via cutting)
+
+ 7. HADOOP-382.  Extend unit tests to run multiple datanodes.
+    (Milind Bhandarkar via cutting)
+
+ 8. HADOOP-604.  Fix some synchronization issues and a
+    NullPointerException in DFS datanode.  (Raghu Angadi via cutting)
+
+ 9. HADOOP-459.  Fix memory leaks and a host of other issues with
+    libhdfs.  (Sameer Paranjpye via cutting)
+
+10. HADOOP-694.  Fix a NullPointerException in jobtracker.
+    (Mahadev Konar via cutting)
+
+11. HADOOP-637.  Fix a memory leak in the IPC server.  Direct buffers
+    are not collected like normal buffers, and provided little
+    advantage.  (Raghu Angadi via cutting)
+
+12. HADOOP-696.  Fix TestTextInputFormat unit test to not rely on the
+    order of directory listings.  (Sameer Paranjpye via cutting)
+
+13. HADOOP-611.  Add support for iterator-based merging to
+    SequenceFile.  (Devaraj Das via cutting)
+
+14. HADOOP-688.  Move DFS administrative commands to a separate
+    command named 'dfsadmin'.  (Dhruba Borthakur via cutting) 
+
+15. HADOOP-708.  Fix test-libhdfs to return the correct status, so
+    that failures will break the build.  (Nigel Daley via cutting)
+
+16. HADOOP-646.  Fix namenode to handle edits files larger than 2GB.
+    (Milind Bhandarkar via cutting)
+
+17. HADOOP-705.  Fix a bug in the JobTracker when failed jobs were
+    not completely cleaned up.  (Mahadev Konar via cutting)
+
+18. HADOOP-613.  Perform final merge while reducing.  This removes one
+    sort pass over the data and should consequently significantly
+    decrease overall processing time.  (Devaraj Das via cutting)
+
+19. HADOOP-661.  Make each job's configuration visible through the web
+    ui.  (Arun C Murthy via cutting)
+
+20. HADOOP-489.  In MapReduce, separate user logs from system logs.
+    Each task's log output is now available through the web ui.  (Arun
+    C Murthy via cutting)
+
+21. HADOOP-712.  Fix record io's xml serialization to correctly handle
+    control-characters.  (Milind Bhandarkar via cutting)
+
+22. HADOOP-668.  Improvements to the web-based DFS browser.
+    (Hairong Kuang via cutting)
+
+23. HADOOP-715.  Fix build.xml so that test logs are written in build
+    directory, rather than in CWD.  (Arun C Murthy via cutting)
+
+24. HADOOP-538.  Add support for building an optional native library,
+    libhadoop.so, that improves the performance of zlib-based
+    compression.  To build this, specify -Dcompile.native to Ant.
+    (Arun C Murthy via cutting)
+
+25. HADOOP-610.  Fix an problem when the DFS block size is configured
+    to be smaller than the buffer size, typically only when debugging.
+    (Milind Bhandarkar via cutting)
+
+26. HADOOP-695.  Fix a NullPointerException in contrib/streaming.
+    (Hairong Kuang via cutting)
+
+27. HADOOP-652.  In DFS, when a file is deleted, the block count is
+    now decremented.  (Vladimir Krokhmalyov via cutting)
+
+28. HADOOP-725.  In DFS, optimize block placement algorithm,
+    previously a performance bottleneck.  (Milind Bhandarkar via cutting)
+
+29. HADOOP-723.  In MapReduce, fix a race condition during the
+    shuffle, which resulted in FileNotFoundExceptions.  (omalley via cutting)
+
+30. HADOOP-447.  In DFS, fix getBlockSize(Path) to work with relative
+    paths.  (Raghu Angadi via cutting)
+
+31. HADOOP-733.  Make exit codes in DFShell consistent and add a unit
+    test.  (Dhruba Borthakur via cutting)
+
+32. HADOOP-709.  Fix contrib/streaming to work with commands that
+    contain control characters.  (Dhruba Borthakur via cutting)
+
+33. HADOOP-677.  In IPC, permit a version header to be transmitted
+    when connections are established.  This will permit us to change
+    the format of IPC requests back-compatibly in subsequent releases.
+    (omalley via cutting)
+
+34. HADOOP-699.  Fix DFS web interface so that filesystem browsing
+    works correctly, using the right port number.  Also add support
+    for sorting datanode list by various columns.
+    (Raghu Angadi via cutting)
+
+35. HADOOP-76.  Implement speculative reduce.  Now when a job is
+    configured for speculative execution, both maps and reduces will
+    execute speculatively.  Reduce outputs are written to temporary
+    location and moved to the final location when reduce is complete.
+    (Sanjay Dahiya via cutting)
+
+36. HADOOP-736.  Roll back to Jetty 5.1.4, due to performance problems
+    with Jetty 6.0.1.
+
+37. HADOOP-739.  Fix TestIPC to use different port number, making it
+    more reliable.  (Nigel Daley via cutting)
+
+38. HADOOP-749.  Fix a NullPointerException in jobfailures.jsp.
+    (omalley via cutting)
+
+39. HADOOP-747.  Fix record serialization to work correctly when
+    records are embedded in Maps.  (Milind Bhandarkar via cutting)
+
+40. HADOOP-698.  Fix HDFS client not to retry the same datanode on
+    read failures.  (Milind Bhandarkar via cutting)
+
+41. HADOOP-689. Add GenericWritable, to facilitate polymorphism in
+    MapReduce, SequenceFile, etc. (Feng Jiang via cutting)
+
+42. HADOOP-430.  Stop datanode's HTTP server when registration with
+    namenode fails.  (Wendy Chien via cutting)
+
+43. HADOOP-750.  Fix a potential race condition during mapreduce
+    shuffle.  (omalley via cutting)
+
+44. HADOOP-728.  Fix contrib/streaming-related issues, including
+    '-reducer NONE'.  (Sanjay Dahiya via cutting)
+
+
+Release 0.8.0 - 2006-11-03
+
+ 1. HADOOP-477.  Extend contrib/streaming to scan the PATH environment
+    variables when resolving executable program names.
+    (Dhruba Borthakur via cutting) 
+
+ 2. HADOOP-583.  In DFSClient, reduce the log level of re-connect
+    attempts from 'info' to 'debug', so they are not normally shown.
+    (Konstantin Shvachko via cutting)
+
+ 3. HADOOP-498.  Re-implement DFS integrity checker to run server-side,
+    for much improved performance.  (Milind Bhandarkar via cutting)
+
+ 4. HADOOP-586.  Use the jar name for otherwise un-named jobs.
+    (Sanjay Dahiya via cutting)
+
+ 5. HADOOP-514.  Make DFS heartbeat interval configurable.
+    (Milind Bhandarkar via cutting)
+
+ 6. HADOOP-588.  Fix logging and accounting of failed tasks.
+    (Sanjay Dahiya via cutting)
+
+ 7. HADOOP-462.  Improve command line parsing in DFSShell, so that
+    incorrect numbers of arguments result in informative errors rather
+    than ArrayOutOfBoundsException.  (Dhruba Borthakur via cutting) 
+
+ 8. HADOOP-561.  Fix DFS so that one replica of each block is written
+    locally, if possible.  This was the intent, but there as a bug.
+    (Dhruba Borthakur via cutting) 
+
+ 9. HADOOP-610.  Fix TaskTracker to survive more exceptions, keeping
+    tasks from becoming lost.  (omalley via cutting)
+
+10. HADOOP-625.  Add a servlet to all http daemons that displays a
+    stack dump, useful for debugging.  (omalley via cutting)
+
+11. HADOOP-554.  Fix DFSShell to return -1 for errors.
+    (Dhruba Borthakur via cutting) 
+
+12. HADOOP-626.  Correct the documentation in the NNBench example
+    code, and also remove a mistaken call there.
+    (Nigel Daley via cutting)
+
+13. HADOOP-634.  Add missing license to many files.
+    (Nigel Daley via cutting)
+
+14. HADOOP-627.  Fix some synchronization problems in MiniMRCluster
+    that sometimes caused unit tests to fail.  (Nigel Daley via cutting)
+
+15. HADOOP-563.  Improve the NameNode's lease policy so that leases
+    are held for one hour without renewal (instead of one minute).
+    However another attempt to create the same file will still succeed
+    if the lease has not been renewed within a minute.  This prevents
+    communication or scheduling problems from causing a write to fail
+    for up to an hour, barring some other process trying to create the
+    same file.  (Dhruba Borthakur via cutting)
+
+16. HADOOP-635.  In DFSShell, permit specification of multiple files
+    as the source for file copy and move commands.
+    (Dhruba Borthakur via cutting)
+
+17. HADOOP-641.  Change NameNode to request a fresh block report from
+    a re-discovered DataNode, so that no-longer-needed replications
+    are stopped promptly.  (Konstantin Shvachko via cutting)
+
+18. HADOOP-642.  Change IPC client to specify an explicit connect
+    timeout.  (Konstantin Shvachko via cutting)
+
+19. HADOOP-638.  Fix an unsynchronized access to TaskTracker's
+    internal state.  (Nigel Daley via cutting)
+
+20. HADOOP-624.  Fix servlet path to stop a Jetty warning on startup.
+    (omalley via cutting)
+
+21. HADOOP-578.  Failed tasks are no longer placed at the end of the
+    task queue.  This was originally done to work around other
+    problems that have now been fixed.  Re-executing failed tasks
+    sooner causes buggy jobs to fail faster.  (Sanjay Dahiya via cutting)
+
+22. HADOOP-658.  Update source file headers per Apache policy.  (cutting)
+
+23. HADOOP-636.  Add MapFile & ArrayFile constructors which accept a
+    Progressable, and pass it down to SequenceFile.  This permits
+    reduce tasks which use MapFile to still report progress while
+    writing blocks to the filesystem.  (cutting)
+
+24. HADOOP-576.  Enable contrib/streaming to use the file cache.  Also
+    extend the cache to permit symbolic links to cached items, rather
+    than local file copies.  (Mahadev Konar via cutting)
+
+25. HADOOP-482.  Fix unit tests to work when a cluster is running on
+    the same machine, removing port conflicts.  (Wendy Chien via cutting)
+
+26. HADOOP-90.  Permit dfs.name.dir to list multiple directories,
+    where namenode data is to be replicated. (Milind Bhandarkar via cutting)
+
+27. HADOOP-651.  Fix DFSCk to correctly pass parameters to the servlet
+    on the namenode.  (Milind Bhandarkar via cutting)
+
+28. HADOOP-553.  Change main() routines of DataNode and NameNode to
+    log exceptions rather than letting the JVM print them to standard
+    error.  Also, change the hadoop-daemon.sh script to rotate
+    standard i/o log files.  (Raghu Angadi via cutting)
+
+29. HADOOP-399.  Fix javadoc warnings.  (Nigel Daley via cutting)
+
+30. HADOOP-599.  Fix web ui and command line to correctly report DFS
+    filesystem size statistics.  Also improve web layout.
+    (Raghu Angadi via cutting)
+
+31. HADOOP-660.  Permit specification of junit test output format.
+    (Nigel Daley via cutting)
+
+32. HADOOP-663.  Fix a few unit test issues.  (Mahadev Konar via cutting)
+
+33. HADOOP-664.  Cause entire build to fail if libhdfs tests fail.
+    (Nigel Daley via cutting)
+
+34. HADOOP-633.  Keep jobtracker from dying when job initialization
+    throws exceptions.  Also improve exception handling in a few other
+    places and add more informative thread names.
+    (omalley via cutting)
+
+35. HADOOP-669.  Fix a problem introduced by HADOOP-90 that can cause
+    DFS to lose files.  (Milind Bhandarkar via cutting)
+
+36. HADOOP-373.  Consistently check the value returned by
+    FileSystem.mkdirs().  (Wendy Chien via cutting)
+
+37. HADOOP-670.  Code cleanups in some DFS internals: use generic
+    types, replace Vector with ArrayList, etc.
+    (Konstantin Shvachko via cutting)
+
+38. HADOOP-647.  Permit map outputs to use a different compression
+    type than the job output.  (omalley via cutting)
+
+39. HADOOP-671.  Fix file cache to check for pre-existence before
+    creating .  (Mahadev Konar via cutting)
+
+40. HADOOP-665.  Extend many DFSShell commands to accept multiple
+    arguments.  Now commands like "ls", "rm", etc. will operate on
+    multiple files.  (Dhruba Borthakur via cutting)
+
+
+Release 0.7.2 - 2006-10-18
+
+ 1. HADOOP-607.  Fix a bug where classes included in job jars were not
+    found by tasks.  (Mahadev Konar via cutting)
+
+ 2. HADOOP-609.  Add a unit test that checks that classes in job jars
+    can be found by tasks.  Also modify unit tests to specify multiple
+    local directories.  (Mahadev Konar via cutting)
+
+
+Release 0.7.1 - 2006-10-11
+
+ 1. HADOOP-593.  Fix a NullPointerException in the JobTracker.
+    (omalley via cutting)
+
+ 2. HADOOP-592.  Fix a NullPointerException in the IPC Server.  Also
+    consistently log when stale calls are discarded.  (omalley via cutting)
+
+ 3. HADOOP-594.  Increase the DFS safe-mode threshold from .95 to
+    .999, so that nearly all blocks must be reported before filesystem
+    modifications are permitted.  (Konstantin Shvachko via cutting)
+
+ 4. HADOOP-598.  Fix tasks to retry when reporting completion, so that
+    a single RPC timeout won't fail a task.  (omalley via cutting)
+
+ 5. HADOOP-597.  Fix TaskTracker to not discard map outputs for errors
+    in transmitting them to reduce nodes.  (omalley via cutting)
+
+
+Release 0.7.0 - 2006-10-06
+
+ 1. HADOOP-243.  Fix rounding in the display of task and job progress
+    so that things are not shown to be 100% complete until they are in
+    fact finished.  (omalley via cutting) 
+
+ 2. HADOOP-438.  Limit the length of absolute paths in DFS, since the
+    file format used to store pathnames has some limitations.
+    (Wendy Chien via cutting)
+
+ 3. HADOOP-530.  Improve error messages in SequenceFile when keys or
+    values are of the wrong type.  (Hairong Kuang via cutting)
+
+ 4. HADOOP-288.  Add a file caching system and use it in MapReduce to
+    cache job jar files on slave nodes.  (Mahadev Konar via cutting)
+
+ 5. HADOOP-533.  Fix unit test to not modify conf directory.
+   (Hairong Kuang via cutting)
+
+ 6. HADOOP-527.  Permit specification of the local address that various
+    Hadoop daemons should bind to.  (Philippe Gassmann via cutting)
+
+ 7. HADOOP-542.  Updates to contrib/streaming: reformatted source code,
+    on-the-fly merge sort, a fix for HADOOP-540, etc.
+    (Michel Tourn via cutting)
+
+ 8. HADOOP-545.  Remove an unused config file parameter.
+    (Philippe Gassmann via cutting)
+
+ 9. HADOOP-548.  Add an Ant property "test.output" to build.xml that
+    causes test output to be logged to the console.  (omalley via cutting)
+
+10. HADOOP-261.  Record an error message when map output is lost.
+    (omalley via cutting)
+
+11. HADOOP-293.  Report the full list of task error messages in the
+    web ui, not just the most recent.  (omalley via cutting)
+
+12. HADOOP-551.  Restore JobClient's console printouts to only include
+    a maximum of one update per one percent of progress.
+    (omalley via cutting)
+
+13. HADOOP-306.  Add a "safe" mode to DFS.  The name node enters this
+    when less than a specified percentage of file data is complete.
+    Currently safe mode is only used on startup, but eventually it
+    will also be entered when datanodes disconnect and file data
+    becomes incomplete.  While in safe mode no filesystem
+    modifications are permitted and block replication is inhibited.
+    (Konstantin Shvachko via cutting)
+
+14. HADOOP-431.  Change 'dfs -rm' to not operate recursively and add a
+    new command, 'dfs -rmr' which operates recursively.
+    (Sameer Paranjpye via cutting)
+
+15. HADOOP-263.  Include timestamps for job transitions.  The web
+    interface now displays the start and end times of tasks and the
+    start times of sorting and reducing for reduce tasks.  Also,
+    extend ObjectWritable to handle enums, so that they can be passed
+    as RPC parameters.  (Sanjay Dahiya via cutting)
+
+16. HADOOP-556.  Contrib/streaming: send keep-alive reports to task
+    tracker every 10 seconds rather than every 100 records, to avoid
+    task timeouts.  (Michel Tourn via cutting)
+
+17. HADOOP-547.  Fix reduce tasks to ping tasktracker while copying
+    data, rather than only between copies, avoiding task timeouts.
+    (Sanjay Dahiya via cutting)
+
+18. HADOOP-537.  Fix src/c++/libhdfs build process to create files in
+    build/, no longer modifying the source tree.
+    (Arun C Murthy via cutting)
+
+19. HADOOP-487.  Throw a more informative exception for unknown RPC
+    hosts.  (Sameer Paranjpye via cutting)
+
+20. HADOOP-559.  Add file name globbing (pattern matching) support to
+    the FileSystem API, and use it in DFSShell ('bin/hadoop dfs')
+    commands.  (Hairong Kuang via cutting)
+
+21. HADOOP-508.  Fix a bug in FSDataInputStream.  Incorrect data was
+    returned after seeking to a random location.
+    (Milind Bhandarkar via cutting)
+
+22. HADOOP-560.  Add a "killed" task state.  This can be used to
+    distinguish kills from other failures.  Task state has also been
+    converted to use an enum type instead of an int, uncovering a bug
+    elsewhere.  The web interface is also updated to display killed
+    tasks.  (omalley via cutting)
+
+23. HADOOP-423.  Normalize Paths containing directories named "." and
+    "..", using the standard, unix interpretation.  Also add checks in
+    DFS, prohibiting the use of "." or ".." as directory or file
+    names.  (Wendy Chien via cutting)
+
+24. HADOOP-513.  Replace map output handling with a servlet, rather
+    than a JSP page.  This fixes an issue where
+    IllegalStateException's were logged, sets content-length
+    correctly, and better handles some errors.  (omalley via cutting)
+
+25. HADOOP-552.  Improved error checking when copying map output files
+    to reduce nodes.  (omalley via cutting)
+
+26. HADOOP-566.  Fix scripts to work correctly when accessed through
+    relative symbolic links.  (Lee Faris via cutting)
+
+27. HADOOP-519.  Add positioned read methods to FSInputStream.  These
+    permit one to read from a stream without moving its position, and
+    can hence be performed by multiple threads at once on a single
+    stream. Implement an optimized version for DFS and local FS.
+    (Milind Bhandarkar via cutting)
+
+28. HADOOP-522. Permit block compression with MapFile and SetFile.
+    Since these formats are always sorted, block compression can
+    provide a big advantage.  (cutting)
+
+29. HADOOP-567. Record version and revision information in builds.  A
+    package manifest is added to the generated jar file containing
+    version information, and a VersionInfo utility is added that
+    includes further information, including the build date and user,
+    and the subversion revision and repository.  A 'bin/hadoop
+    version' comand is added to show this information, and it is also
+    added to various web interfaces.  (omalley via cutting)
+
+30. HADOOP-568.  Fix so that errors while initializing tasks on a
+    tasktracker correctly report the task as failed to the jobtracker,
+    so that it will be rescheduled.  (omalley via cutting)
+
+31. HADOOP-550.  Disable automatic UTF-8 validation in Text.  This
+    permits, e.g., TextInputFormat to again operate on non-UTF-8 data.
+    (Hairong and Mahadev via cutting)
+
+32. HADOOP-343.  Fix mapred copying so that a failed tasktracker
+    doesn't cause other copies to slow.  (Sameer Paranjpye via cutting)
+
+33. HADOOP-239.  Add a persistent job history mechanism, so that basic
+    job statistics are not lost after 24 hours and/or when the
+    jobtracker is restarted.  (Sanjay Dahiya via cutting)
+
+34. HADOOP-506.  Ignore heartbeats from stale task trackers.
+   (Sanjay Dahiya via cutting)
+
+35. HADOOP-255.  Discard stale, queued IPC calls.  Do not process
+    calls whose clients will likely time out before they receive a
+    response.  When the queue is full, new calls are now received and
+    queued, and the oldest calls are discarded, so that, when servers
+    get bogged down, they no longer develop a backlog on the socket.
+    This should improve some DFS namenode failure modes.
+    (omalley via cutting)
+
+36. HADOOP-581.  Fix datanode to not reset itself on communications
+    errors with the namenode.  If a request to the namenode fails, the
+    datanode should retry, not restart.  This reduces the load on the
+    namenode, since restarts cause a resend of the block report.
+    (omalley via cutting)
+
+
+Release 0.6.2 - 2006-09-18
+
+1. HADOOP-532.  Fix a bug reading value-compressed sequence files,
+   where an exception was thrown reporting that the full value had not
+   been read.  (omalley via cutting)
+
+2. HADOOP-534.  Change the default value class in JobConf to be Text
+   instead of the now-deprecated UTF8.  This fixes the Grep example
+   program, which was updated to use Text, but relies on this
+   default.  (Hairong Kuang via cutting)
+
+
+Release 0.6.1 - 2006-09-13
+
+ 1. HADOOP-520.  Fix a bug in libhdfs, where write failures were not
+    correctly returning error codes.  (Arun C Murthy via cutting)
+
+ 2. HADOOP-523.  Fix a NullPointerException when TextInputFormat is
+    explicitly specified.  Also add a test case for this.
+    (omalley via cutting)
+
+ 3. HADOOP-521.  Fix another NullPointerException finding the
+    ClassLoader when using libhdfs.  (omalley via cutting)
+
+ 4. HADOOP-526.  Fix a NullPointerException when attempting to start
+    two datanodes in the same directory.  (Milind Bhandarkar via cutting)
+
+ 5. HADOOP-529.  Fix a NullPointerException when opening
+    value-compressed sequence files generated by pre-0.6.0 Hadoop.
+    (omalley via cutting)
+
+
+Release 0.6.0 - 2006-09-08
+
+ 1. HADOOP-427.  Replace some uses of DatanodeDescriptor in the DFS
+    web UI code with DatanodeInfo, the preferred public class.
+    (Devaraj Das via cutting)
+
+ 2. HADOOP-426.  Fix streaming contrib module to work correctly on
+    Solaris.  This was causing nightly builds to fail.
+    (Michel Tourn via cutting)
+
+ 3. HADOOP-400.  Improvements to task assignment.  Tasks are no longer
+    re-run on nodes where they have failed (unless no other node is
+    available).  Also, tasks are better load-balanced among nodes.
+    (omalley via cutting)
+
+ 4. HADOOP-324.  Fix datanode to not exit when a disk is full, but
+    rather simply to fail writes.  (Wendy Chien via cutting)
+
+ 5. HADOOP-434.  Change smallJobsBenchmark to use standard Hadoop
+    scripts.  (Sanjay Dahiya via cutting)
+
+ 6. HADOOP-453.  Fix a bug in Text.setCapacity().  (siren via cutting)
+
+
+ 7. HADOOP-450.  Change so that input types are determined by the
+    RecordReader rather than specified directly in the JobConf.  This
+    facilitates jobs with a variety of input types.
+
+    WARNING: This contains incompatible API changes!  The RecordReader
+    interface has two new methods that all user-defined InputFormats
+    must now define.  Also, the values returned by TextInputFormat are
+    no longer of class UTF8, but now of class Text.
+
+ 8. HADOOP-436.  Fix an error-handling bug in the web ui.
+    (Devaraj Das via cutting)
+
+ 9. HADOOP-455.  Fix a bug in Text, where DEL was not permitted.
+    (Hairong Kuang via cutting)
+
+10. HADOOP-456.  Change the DFS namenode to keep a persistent record
+    of the set of known datanodes.  This will be used to implement a
+    "safe mode" where filesystem changes are prohibited when a
+    critical percentage of the datanodes are unavailable.
+    (Konstantin Shvachko via cutting)
+
+11. HADOOP-322.  Add a job control utility.  This permits one to
+    specify job interdependencies.  Each job is submitted only after
+    the jobs it depends on have successfully completed.
+    (Runping Qi via cutting)
+
+12. HADOOP-176.  Fix a bug in IntWritable.Comparator.
+    (Dick King via cutting)
+
+13. HADOOP-421.  Replace uses of String in recordio package with Text
+    class, for improved handling of UTF-8 data.
+    (Milind Bhandarkar via cutting)
+
+14. HADOOP-464.  Improved error message when job jar not found.
+    (Michel Tourn via cutting)
+
+15. HADOOP-469.  Fix /bin/bash specifics that have crept into our
+    /bin/sh scripts since HADOOP-352.
+    (Jean-Baptiste Quenot via cutting)
+
+16. HADOOP-468.  Add HADOOP_NICENESS environment variable to set
+    scheduling priority for daemons.  (Vetle Roeim via cutting)
+
+17. HADOOP-473.  Fix TextInputFormat to correctly handle more EOL
+    formats.  Things now work correctly with CR, LF or CRLF.
+    (Dennis Kubes & James White via cutting)
+
+18. HADOOP-461.  Make Java 1.5 an explicit requirement.  (cutting)
+
+19. HADOOP-54.  Add block compression to SequenceFile.  One may now
+    specify that blocks of keys and values are compressed together,
+    improving compression for small keys and values.
+    SequenceFile.Writer's constructor is now deprecated and replaced
+    with a factory method.  (Arun C Murthy via cutting)
+
+20. HADOOP-281.  Prohibit DFS files that are also directories.
+    (Wendy Chien via cutting)
+
+21. HADOOP-486.  Add the job username to JobStatus instances returned
+    by JobClient.  (Mahadev Konar via cutting)
+
+22. HADOOP-437.  contrib/streaming: Add support for gzipped inputs.
+    (Michel Tourn via cutting)
+
+23. HADOOP-463.  Add variable expansion to config files.
+    Configuration property values may now contain variable
+    expressions.  A variable is referenced with the syntax
+    '${variable}'.  Variables values are found first in the
+    configuration, and then in Java system properties.  The default
+    configuration is modified so that temporary directories are now
+    under ${hadoop.tmp.dir}, which is, by default,
+    /tmp/hadoop-${user.name}.  (Michel Tourn via cutting)
+
+24. HADOOP-419. Fix a NullPointerException finding the ClassLoader
+    when using libhdfs.  (omalley via cutting)
+
+25. HADOOP-460. Fix contrib/smallJobsBenchmark to use Text instead of
+    UTF8.  (Sanjay Dahiya via cutting)
+
+26. HADOOP-196.  Fix Configuration(Configuration) constructor to work
+    correctly.  (Sami Siren via cutting)
+
+27. HADOOP-501.  Fix Configuration.toString() to handle URL resources.
+    (Thomas Friol via cutting)
+
+28. HADOOP-499.  Reduce the use of Strings in contrib/streaming,
+    replacing them with Text for better performance.
+    (Hairong Kuang via cutting)
+
+29. HADOOP-64.  Manage multiple volumes with a single DataNode.
+    Previously DataNode would create a separate daemon per configured
+    volume, each with its own connection to the NameNode.  Now all
+    volumes are handled by a single DataNode daemon, reducing the load
+    on the NameNode.  (Milind Bhandarkar via cutting)
+
+30. HADOOP-424.  Fix MapReduce so that jobs which generate zero splits
+    do not fail.  (Fr??d??ric Bertin via cutting)
+
+31. HADOOP-408.  Adjust some timeouts and remove some others so that
+    unit tests run faster.  (cutting)
+
+32. HADOOP-507.  Fix an IllegalAccessException in DFS.
+    (omalley via cutting)
+
+33. HADOOP-320.  Fix so that checksum files are correctly copied when
+    the destination of a file copy is a directory.
+    (Hairong Kuang via cutting)
+
+34. HADOOP-286.  In DFSClient, avoid pinging the NameNode with
+    renewLease() calls when no files are being written.
+    (Konstantin Shvachko via cutting)
+
+35. HADOOP-312.  Close idle IPC connections.  All IPC connections were
+    cached forever.  Now, after a connection has been idle for more
+    than a configurable amount of time (one second by default), the
+    connection is closed, conserving resources on both client and
+    server. (Devaraj Das via cutting)
+
+36. HADOOP-497.  Permit the specification of the network interface and
+    nameserver to be used when determining the local hostname
+    advertised by datanodes and tasktrackers.
+    (Lorenzo Thione via cutting)
+
+37. HADOOP-441.  Add a compression codec API and extend SequenceFile
+    to use it.  This will permit the use of alternate compression
+    codecs in SequenceFile.  (Arun C Murthy via cutting)
+
+38. HADOOP-483. Improvements to libhdfs build and documentation.
+    (Arun C Murthy via cutting)
+
+39. HADOOP-458.  Fix a memory corruption bug in libhdfs.
+    (Arun C Murthy via cutting)
+
+40. HADOOP-517.  Fix a contrib/streaming bug in end-of-line detection.
+    (Hairong Kuang via cutting)
+
+41. HADOOP-474.  Add CompressionCodecFactory, and use it in
+    TextInputFormat and TextOutputFormat.  Compressed input files are
+    automatically decompressed when they have the correct extension.
+    Output files will, when output compression is specified, be
+    generated with an approprate extension.  Also add a gzip codec and
+    fix problems with UTF8 text inputs.  (omalley via cutting)
+
+
+Release 0.5.0 - 2006-08-04
+
+ 1. HADOOP-352.  Fix shell scripts to use /bin/sh instead of
+    /bin/bash, for better portability.
+    (Jean-Baptiste Quenot via cutting)
+
+ 2. HADOOP-313.  Permit task state to be saved so that single tasks
+    may be manually re-executed when debugging.  (omalley via cutting)
+
+ 3. HADOOP-339.  Add method to JobClient API listing jobs that are
+    not yet complete, i.e., that are queued or running.
+    (Mahadev Konar via cutting)
+
+ 4. HADOOP-355.  Updates to the streaming contrib module, including
+    API fixes, making reduce optional, and adding an input type for
+    StreamSequenceRecordReader.  (Michel Tourn via cutting)
+
+ 5. HADOOP-358.  Fix a NPE bug in Path.equals().
+    (Fr??d??ric Bertin via cutting)
+
+ 6. HADOOP-327.  Fix ToolBase to not call System.exit() when
+    exceptions are thrown.  (Hairong Kuang via cutting)
+
+ 7. HADOOP-359.  Permit map output to be compressed.
+    (omalley via cutting)
+
+ 8. HADOOP-341.  Permit input URI to CopyFiles to use the HTTP
+    protocol.  This lets one, e.g., more easily copy log files into
+    DFS.  (Arun C Murthy via cutting)
+
+ 9. HADOOP-361.  Remove unix dependencies from streaming contrib
+    module tests, making them pure java. (Michel Tourn via cutting)
+
+10. HADOOP-354.  Make public methods to stop DFS daemons.
+    (Barry Kaplan via cutting)
+
+11. HADOOP-252.  Add versioning to RPC protocols.
+    (Milind Bhandarkar via cutting)
+
+12. HADOOP-356.  Add contrib to "compile" and "test" build targets, so
+    that this code is better maintained. (Michel Tourn via cutting)
+
+13. HADOOP-307.  Add smallJobsBenchmark contrib module.  This runs
+    lots of small jobs, in order to determine per-task overheads.
+    (Sanjay Dahiya via cutting)
+
+14. HADOOP-342.  Add a tool for log analysis: Logalyzer.
+    (Arun C Murthy via cutting)
+
+15. HADOOP-347.  Add web-based browsing of DFS content.  The namenode
+    redirects browsing requests to datanodes.  Content requests are
+    redirected to datanodes where the data is local when possible.
+    (Devaraj Das via cutting)
+
+16. HADOOP-351.  Make Hadoop IPC kernel independent of Jetty.
+    (Devaraj Das via cutting)
+
+17. HADOOP-237.  Add metric reporting to DFS and MapReduce.  With only
+    minor configuration changes, one can now monitor many Hadoop
+    system statistics using Ganglia or other monitoring systems.
+    (Milind Bhandarkar via cutting)
+
+18. HADOOP-376.  Fix datanode's HTTP server to scan for a free port.
+    (omalley via cutting)
+
+19. HADOOP-260.  Add --config option to shell scripts, specifying an
+    alternate configuration directory. (Milind Bhandarkar via cutting)
+
+20. HADOOP-381.  Permit developers to save the temporary files for
+    tasks whose names match a regular expression, to facilliate
+    debugging.  (omalley via cutting)
+
+21. HADOOP-344.  Fix some Windows-related problems with DF.
+    (Konstantin Shvachko via cutting)
+
+22. HADOOP-380.  Fix reduce tasks to poll less frequently for map
+    outputs. (Mahadev Konar via cutting)
+
+23. HADOOP-321.  Refactor DatanodeInfo, in preparation for
+    HADOOP-306.  (Konstantin Shvachko & omalley via cutting)
+
+24. HADOOP-385.  Fix some bugs in record io code generation.
+    (Milind Bhandarkar via cutting)
+
+25. HADOOP-302.  Add new Text class to replace UTF8, removing
+    limitations of that class.  Also refactor utility methods for
+    writing zero-compressed integers (VInts and VLongs).
+    (Hairong Kuang via cutting)
+
+26. HADOOP-335.  Refactor DFS namespace/transaction logging in
+    namenode.   (Konstantin Shvachko via cutting)
+
+27. HADOOP-375.  Fix handling of the datanode HTTP daemon's port so
+    that multiple datanode's can be run on a single host.
+    (Devaraj Das via cutting)
+
+28. HADOOP-386.  When removing excess DFS block replicas, remove those
+    on nodes with the least free space first.
+    (Johan Oskarson via cutting)
+
+29. HADOOP-389.  Fix intermittent failures of mapreduce unit tests.
+    Also fix some build dependencies.
+    (Mahadev & Konstantin via cutting)
+
+30. HADOOP-362.  Fix a problem where jobs hang when status messages
+    are recieved out-of-order.  (omalley via cutting)
+
+31. HADOOP-394.  Change order of DFS shutdown in unit tests to
+    minimize errors logged.  (Konstantin Shvachko via cutting)
+
+32. HADOOP-396.  Make DatanodeID implement Writable.
+    (Konstantin Shvachko via cutting)
+
+33. HADOOP-377.  Permit one to add URL resources to a Configuration.
+    (Jean-Baptiste Quenot via cutting)
+
+34. HADOOP-345.  Permit iteration over Configuration key/value pairs.
+    (Michel Tourn via cutting)
+
+35. HADOOP-409.  Streaming contrib module: make configuration
+    properties available to commands as environment variables.
+    (Michel Tourn via cutting)
+
+36. HADOOP-369.  Add -getmerge option to dfs command that appends all
+    files in a directory into a single local file.
+    (Johan Oskarson via cutting)
+
+37. HADOOP-410.  Replace some TreeMaps with HashMaps in DFS, for
+    a 17% performance improvement. (Milind Bhandarkar via cutting)
+
+38. HADOOP-411.  Add unit tests for command line parser.
+    (Hairong Kuang via cutting)
+
+39. HADOOP-412.  Add MapReduce input formats that support filtering
+    of SequenceFile data, including sampling and regex matching.
+    Also, move JobConf.newInstance() to a new utility class.
+    (Hairong Kuang via cutting)
+
+40. HADOOP-226.  Fix fsck command to properly consider replication
+    counts, now that these can vary per file.  (Bryan Pendleton via cutting)
+
+41. HADOOP-425.  Add a Python MapReduce example, using Jython.
+    (omalley via cutting)
+
+
+Release 0.4.0 - 2006-06-28
+
+ 1. HADOOP-298.  Improved progress reports for CopyFiles utility, the
+    distributed file copier.  (omalley via cutting)
+
+ 2. HADOOP-299.  Fix the task tracker, permitting multiple jobs to
+    more easily execute at the same time.  (omalley via cutting)
+
+ 3. HADOOP-250.  Add an HTTP user interface to the namenode, running
+    on port 50070. (Devaraj Das via cutting)
+
+ 4. HADOOP-123.  Add MapReduce unit tests that run a jobtracker and
+    tasktracker, greatly increasing code coverage.
+    (Milind Bhandarkar via cutting)
+
+ 5. HADOOP-271.  Add links from jobtracker's web ui to tasktracker's
+    web ui.  Also attempt to log a thread dump of child processes
+    before they're killed.  (omalley via cutting)
+
+ 6. HADOOP-210.  Change RPC server to use a selector instead of a
+    thread per connection.  This should make it easier to scale to
+    larger clusters.  Note that this incompatibly changes the RPC
+    protocol: clients and servers must both be upgraded to the new
+    version to ensure correct operation.  (Devaraj Das via cutting)
+
+ 7. HADOOP-311.  Change DFS client to retry failed reads, so that a
+    single read failure will not alone cause failure of a task.
+    (omalley via cutting)
+
+ 8. HADOOP-314.  Remove the "append" phase when reducing.  Map output
+    files are now directly passed to the sorter, without first
+    appending them into a single file.  Now, the first third of reduce
+    progress is "copy" (transferring map output to reduce nodes), the
+    middle third is "sort" (sorting map output) and the last third is
+    "reduce" (generating output).  Long-term, the "sort" phase will
+    also be removed.  (omalley via cutting)
+
+ 9. HADOOP-316.  Fix a potential deadlock in the jobtracker.
+    (omalley via cutting)
+
+10. HADOOP-319.  Fix FileSystem.close() to remove the FileSystem
+    instance from the cache.  (Hairong Kuang via cutting)
+
+11. HADOOP-135.  Fix potential deadlock in JobTracker by acquiring
+    locks in a consistent order.  (omalley via cutting)
+
+12. HADOOP-278.  Check for existence of input directories before
+    starting MapReduce jobs, making it easier to debug this common
+    error.  (omalley via cutting)
+
+13. HADOOP-304.  Improve error message for
+    UnregisterdDatanodeException to include expected node name.
+   (Konstantin Shvachko via cutting)
+
+14. HADOOP-305.  Fix TaskTracker to ask for new tasks as soon as a
+    task is finished, rather than waiting for the next heartbeat.
+    This improves performance when tasks are short.
+    (Mahadev Konar via cutting)
+
+15. HADOOP-59.  Add support for generic command line options.  One may
+    now specify the filesystem (-fs), the MapReduce jobtracker (-jt),
+    a config file (-conf) or any configuration property (-D).  The
+    "dfs", "fsck", "job", and "distcp" commands currently support
+    this, with more to be added.  (Hairong Kuang via cutting)
+
+16. HADOOP-296.  Permit specification of the amount of reserved space
+    on a DFS datanode.  One may specify both the percentage free and
+    the number of bytes.  (Johan Oskarson via cutting)
+
+17. HADOOP-325.  Fix a problem initializing RPC parameter classes, and
+    remove the workaround used to initialize classes.
+    (omalley via cutting)
+
+18. HADOOP-328.  Add an option to the "distcp" command to ignore read
+    errors while copying.  (omalley via cutting)
+
+19. HADOOP-27.  Don't allocate tasks to trackers whose local free
+    space is too low.  (Johan Oskarson via cutting)
+
+20. HADOOP-318.  Keep slow DFS output from causing task timeouts.
+    This incompatibly changes some public interfaces, adding a
+    parameter to OutputFormat.getRecordWriter() and the new method
+    Reporter.progress(), but it makes lots of tasks succeed that were
+    previously failing.  (Milind Bhandarkar via cutting)
+
+
+Release 0.3.2 - 2006-06-09
+
+ 1. HADOOP-275.  Update the streaming contrib module to use log4j for
+    its logging.  (Michel Tourn via cutting)
+
+ 2. HADOOP-279.  Provide defaults for log4j logging parameters, so
+    that things still work reasonably when Hadoop-specific system
+    properties are not provided.  (omalley via cutting)
+
+ 3. HADOOP-280.  Fix a typo in AllTestDriver which caused the wrong
+    test to be run when "DistributedFSCheck" was specified.
+   (Konstantin Shvachko via cutting)
+
+ 4. HADOOP-240.  DFS's mkdirs() implementation no longer logs a warning
+    when the directory already exists. (Hairong Kuang via cutting)
+
+ 5. HADOOP-285.  Fix DFS datanodes to be able to re-join the cluster
+    after the connection to the namenode is lost.  (omalley via cutting)
+
+ 6. HADOOP-277.  Fix a race condition when creating directories.
+   (Sameer Paranjpye via cutting)
+
+ 7. HADOOP-289.  Improved exception handling in DFS datanode.
+    (Konstantin Shvachko via cutting)
+
+ 8. HADOOP-292.  Fix client-side logging to go to standard error
+    rather than standard output, so that it can be distinguished from
+    application output.  (omalley via cutting)
+
+ 9. HADOOP-294.  Fixed bug where conditions for retrying after errors
+    in the DFS client were reversed.  (omalley via cutting)
+
+
+Release 0.3.1 - 2006-06-05
+
+ 1. HADOOP-272.  Fix a bug in bin/hadoop setting log
+    parameters. (omalley & cutting)
+
+ 2. HADOOP-274.  Change applications to log to standard output rather
+    than to a rolling log file like daemons.  (omalley via cutting)
+
+ 3. HADOOP-262.  Fix reduce tasks to report progress while they're
+    waiting for map outputs, so that they do not time out.
+    (Mahadev Konar via cutting)
+
+ 4. HADOOP-245 and HADOOP-246.  Improvements to record io package.  
+    (Mahadev Konar via cutting)
+
+ 5. HADOOP-276.  Add logging config files to jar file so that they're
+    always found.  (omalley via cutting)
+
+
+Release 0.3.0 - 2006-06-02
+
+ 1. HADOOP-208.  Enhance MapReduce web interface, adding new pages
+    for failed tasks, and tasktrackers.  (omalley via cutting)
+
+ 2. HADOOP-204.  Tweaks to metrics package.  (David Bowen via cutting)
+
+ 3. HADOOP-209.  Add a MapReduce-based file copier.  This will
+    copy files within or between file systems in parallel.
+    (Milind Bhandarkar via cutting)
+
+ 4. HADOOP-146.  Fix DFS to check when randomly generating a new block
+    id that no existing blocks already have that id.
+    (Milind Bhandarkar via cutting)
+
+ 5. HADOOP-180. Make a daemon thread that does the actual task clean ups, so
+    that the main offerService thread in the taskTracker doesn't get stuck
+    and miss his heartbeat window. This was killing many task trackers as
+    big jobs finished (300+ tasks / node). (omalley via cutting)
+
+ 6. HADOOP-200. Avoid transmitting entire list of map task names to
+    reduce tasks.  Instead just transmit the number of map tasks and
+    henceforth refer to them by number when collecting map output.
+    (omalley via cutting)
+
+ 7. HADOOP-219. Fix a NullPointerException when handling a checksum
+    exception under SequenceFile.Sorter.sort().  (cutting & stack)
+
+ 8. HADOOP-212. Permit alteration of the file block size in DFS.  The
+    default block size for new files may now be specified in the
+    configuration with the dfs.block.size property.  The block size
+    may also be specified when files are opened.
+    (omalley via cutting)
+
+ 9. HADOOP-218. Avoid accessing configuration while looping through
+    tasks in JobTracker.  (Mahadev Konar via cutting)
+
+10. HADOOP-161. Add hashCode() method to DFS's Block.
+    (Milind Bhandarkar via cutting)
+
+11. HADOOP-115. Map output types may now be specified.  These are also
+    used as reduce input types, thus permitting reduce input types to
+    differ from reduce output types.  (Runping Qi via cutting)
+
+12. HADOOP-216. Add task progress to task status page.
+    (Bryan Pendelton via cutting)
+
+13. HADOOP-233.  Add web server to task tracker that shows running
+    tasks and logs.  Also add log access to job tracker web interface.
+    (omalley via cutting)
+
+14. HADOOP-205.  Incorporate pending tasks into tasktracker load
+    calculations.  (Mahadev Konar via cutting)
+
+15. HADOOP-247.  Fix sort progress to better handle exceptions.
+    (Mahadev Konar via cutting)
+
+16. HADOOP-195.  Improve performance of the transfer of map outputs to
+    reduce nodes by performing multiple transfers in parallel, each on
+    a separate socket.  (Sameer Paranjpye via cutting)
+
+17. HADOOP-251.  Fix task processes to be tolerant of failed progress
+    reports to their parent process.  (omalley via cutting)
+
+18. HADOOP-325.  Improve the FileNotFound exceptions thrown by
+    LocalFileSystem to include the name of the file.
+    (Benjamin Reed via cutting)
+
+19. HADOOP-254.  Use HTTP to transfer map output data to reduce
+    nodes.  This, together with HADOOP-195, greatly improves the
+    performance of these transfers.  (omalley via cutting)
+
+20. HADOOP-163.  Cause datanodes that\ are unable to either read or
+    write data to exit, so that the namenode will no longer target
+    them for new blocks and will replicate their data on other nodes.
+    (Hairong Kuang via cutting)
+
+21. HADOOP-222.  Add a -setrep option to the dfs commands that alters
+    file replication levels.  (Johan Oskarson via cutting)
+
+22. HADOOP-75.  In DFS, only check for a complete file when the file
+    is closed, rather than as each block is written.
+    (Milind Bhandarkar via cutting)
+
+23. HADOOP-124. Change DFS so that datanodes are identified by a
+    persistent ID rather than by host and port.  This solves a number
+    of filesystem integrity problems, when, e.g., datanodes are
+    restarted.  (Konstantin Shvachko via cutting)
+
+24. HADOOP-256.  Add a C API for DFS.  (Arun C Murthy via cutting)
+
+25. HADOOP-211.  Switch to use the Jakarta Commons logging internally,
+    configured to use log4j by default.  (Arun C Murthy and cutting)
+
+26. HADOOP-265.  Tasktracker now fails to start if it does not have a
+    writable local directory for temporary files.  In this case, it
+    logs a message to the JobTracker and exits. (Hairong Kuang via cutting)
+
+27. HADOOP-270.  Fix potential deadlock in datanode shutdown.
+    (Hairong Kuang via cutting)
+
+Release 0.2.1 - 2006-05-12
+
+ 1. HADOOP-199.  Fix reduce progress (broken by HADOOP-182).
+    (omalley via cutting)
+
+ 2. HADOOP-201.  Fix 'bin/hadoop dfs -report'.  (cutting)
+
+ 3. HADOOP-207.  Fix JDK 1.4 incompatibility introduced by HADOOP-96.
+    System.getenv() does not work in JDK 1.4.  (Hairong Kuang via cutting)
+
+
+Release 0.2.0 - 2006-05-05
+
+ 1. Fix HADOOP-126. 'bin/hadoop dfs -cp' now correctly copies .crc
+    files.  (Konstantin Shvachko via cutting)
+
+ 2. Fix HADOOP-51. Change DFS to support per-file replication counts.
+    (Konstantin Shvachko via cutting)
+
+ 3. Fix HADOOP-131.  Add scripts to start/stop dfs and mapred daemons.
+    Use these in start/stop-all scripts.  (Chris Mattmann via cutting)
+
+ 4. Stop using ssh options by default that are not yet in widely used
+    versions of ssh.  Folks can still enable their use by uncommenting
+    a line in conf/hadoop-env.sh. (cutting)
+
+ 5. Fix HADOOP-92.  Show information about all attempts to run each
+    task in the web ui.  (Mahadev konar via cutting)
+
+ 6. Fix HADOOP-128.  Improved DFS error handling. (Owen O'Malley via cutting)
+
+ 7. Fix HADOOP-129.  Replace uses of java.io.File with new class named
+    Path.  This fixes bugs where java.io.File methods were called
+    directly when FileSystem methods were desired, and reduces the
+    likelihood of such bugs in the future.  It also makes the handling
+    of pathnames more consistent between local and dfs FileSystems and
+    between Windows and Unix. java.io.File-based methods are still
+    available for back-compatibility, but are deprecated and will be
+    removed once 0.2 is released. (cutting)
+
+ 8. Change dfs.data.dir and mapred.local.dir to be comma-separated
+    lists of directories, no longer be space-separated. This fixes
+    several bugs on Windows. (cutting)
+
+ 9. Fix HADOOP-144.  Use mapred task id for dfs client id, to
+    facilitate debugging.  (omalley via cutting)
+
+10. Fix HADOOP-143.  Do not line-wrap stack-traces in web ui.
+    (omalley via cutting)
+
+11. Fix HADOOP-118.  In DFS, improve clean up of abandoned file
+    creations.  (omalley via cutting)
+
+12. Fix HADOOP-138.  Stop multiple tasks in a single heartbeat, rather
+    than one per heartbeat.  (Stefan via cutting)
+
+13. Fix HADOOP-139.  Remove a potential deadlock in
+    LocalFileSystem.lock().  (Igor Bolotin via cutting)
+
+14. Fix HADOOP-134.  Don't hang jobs when the tasktracker is
+    misconfigured to use an un-writable local directory.  (omalley via cutting)
+
+15. Fix HADOOP-115.  Correct an error message.  (Stack via cutting)
+
+16. Fix HADOOP-133.  Retry pings from child to parent, in case of
+    (local) communcation problems.  Also log exit status, so that one
+    can distinguish patricide from other deaths.  (omalley via cutting)
+
+17. Fix HADOOP-142.  Avoid re-running a task on a host where it has
+    previously failed.  (omalley via cutting)
+
+18. Fix HADOOP-148.  Maintain a task failure count for each
+    tasktracker and display it in the web ui.  (omalley via cutting)
+
+19. Fix HADOOP-151.  Close a potential socket leak, where new IPC
+    connection pools were created per configuration instance that RPCs
+    use.  Now a global RPC connection pool is used again, as
+    originally intended.  (cutting)
+
+20. Fix HADOOP-69.  Don't throw a NullPointerException when getting
+    hints for non-existing file split.  (Bryan Pendelton via cutting)
+
+21. Fix HADOOP-157.  When a task that writes dfs files (e.g., a reduce
+    task) failed and was retried, it would fail again and again,
+    eventually failing the job.  The problem was that dfs did not yet
+    know that the failed task had abandoned the files, and would not
+    yet let another task create files with the same names.  Dfs now
+    retries when creating a file long enough for locks on abandoned
+    files to expire.  (omalley via cutting)
+
+22. Fix HADOOP-150.  Improved task names that include job
+    names. (omalley via cutting)
+
+23. Fix HADOOP-162.  Fix ConcurrentModificationException when
+    releasing file locks. (omalley via cutting)
+
+24. Fix HADOOP-132.  Initial check-in of new Metrics API, including 
+    implementations for writing metric data to a file and for sending
+    it to Ganglia.  (David Bowen via cutting)
+
+25. Fix HADOOP-160.  Remove some uneeded synchronization around
+    time-consuming operations in the TaskTracker.  (omalley via cutting)
+
+26. Fix HADOOP-166.  RPCs failed when passed subclasses of a declared
+    parameter type.  This is fixed by changing ObjectWritable to store
+    both the declared type and the instance type for Writables.  Note
+    that this incompatibly changes the format of ObjectWritable and
+    will render unreadable any ObjectWritables stored in files.
+    Nutch only uses ObjectWritable in intermediate files, so this
+    should not be a problem for Nutch.  (Stefan & cutting)
+
+27. Fix HADOOP-168.  MapReduce RPC protocol methods should all declare
+    IOException, so that timeouts are handled appropriately.
+    (omalley via cutting)
+
+28. Fix HADOOP-169.  Don't fail a reduce task if a call to the
+    jobtracker to locate map outputs fails.  (omalley via cutting)
+
+29. Fix HADOOP-170.  Permit FileSystem clients to examine and modify
+    the replication count of individual files.  Also fix a few
+    replication-related bugs. (Konstantin Shvachko via cutting)
+
+30. Permit specification of a higher replication levels for job
+    submission files (job.xml and job.jar).  This helps with large
+    clusters, since these files are read by every node.  (cutting)
+
+31. HADOOP-173.  Optimize allocation of tasks with local data.  (cutting)
+
+32. HADOOP-167.  Reduce number of Configurations and JobConf's
+    created.  (omalley via cutting)
+
+33. NUTCH-256.  Change FileSystem#createNewFile() to create a .crc
+    file.  The lack of a .crc file was causing warnings.  (cutting)
+
+34. HADOOP-174.  Change JobClient to not abort job until it has failed
+    to contact the job tracker for five attempts, not just one as
+    before.  (omalley via cutting)
+
+35. HADOOP-177.  Change MapReduce web interface to page through tasks.
+    Previously, when jobs had more than a few thousand tasks they
+    could crash web browsers.  (Mahadev Konar via cutting)
+
+36. HADOOP-178.  In DFS, piggyback blockwork requests from datanodes
+    on heartbeat responses from namenode.  This reduces the volume of
+    RPC traffic.  Also move startup delay in blockwork from datanode
+    to namenode.  This fixes a problem where restarting the namenode
+    triggered a lot of uneeded replication. (Hairong Kuang via cutting)
+
+37. HADOOP-183.  If the DFS namenode is restarted with different
+    minimum and/or maximum replication counts, existing files'
+    replication counts are now automatically adjusted to be within the
+    newly configured bounds. (Hairong Kuang via cutting)
+
+38. HADOOP-186.  Better error handling in TaskTracker's top-level
+    loop.  Also improve calculation of time to send next heartbeat.
+    (omalley via cutting)
+
+39. HADOOP-187.  Add two MapReduce examples/benchmarks.  One creates
+    files containing random data.  The second sorts the output of the
+    first.  (omalley via cutting)
+
+40. HADOOP-185.  Fix so that, when a task tracker times out making the
+    RPC asking for a new task to run, the job tracker does not think
+    that it is actually running the task returned.  (omalley via cutting)
+
+41. HADOOP-190.  If a child process hangs after it has reported
+    completion, its output should not be lost.  (Stack via cutting)
+
+42. HADOOP-184. Re-structure some test code to better support testing
+    on a cluster.  (Mahadev Konar via cutting)
+
+43. HADOOP-191  Add streaming package, Hadoop's first contrib module.
+    This permits folks to easily submit MapReduce jobs whose map and
+    reduce functions are implemented by shell commands.  Use
+    'bin/hadoop jar build/hadoop-streaming.jar' to get details.
+    (Michel Tourn via cutting)
+
+44. HADOOP-189.  Fix MapReduce in standalone configuration to
+    correctly handle job jar files that contain a lib directory with
+    nested jar files.  (cutting)
+
+45. HADOOP-65.  Initial version of record I/O framework that enables
+    the specification of record types and generates marshalling code
+    in both Java and C++.  Generated Java code implements
+    WritableComparable, but is not yet otherwise used by
+    Hadoop. (Milind Bhandarkar via cutting)
+
+46. HADOOP-193.  Add a MapReduce-based FileSystem benchmark.
+    (Konstantin Shvachko via cutting)
+
+47. HADOOP-194.  Add a MapReduce-based FileSystem checker.  This reads
+    every block in every file in the filesystem.  (Konstantin Shvachko
+    via cutting)
+
+48. HADOOP-182.  Fix so that lost task trackers to not change the
+    status of reduce tasks or completed jobs.  Also fixes the progress
+    meter so that failed tasks are subtracted. (omalley via cutting)
+
+49. HADOOP-96.  Logging improvements.  Log files are now separate from
+    standard output and standard error files.  Logs are now rolled.
+    Logging of all DFS state changes can be enabled, to facilitate
+    debugging.  (Hairong Kuang via cutting)
+
+
+Release 0.1.1 - 2006-04-08
+
+ 1. Added CHANGES.txt, logging all significant changes to Hadoop.  (cutting)
+
+ 2. Fix MapReduceBase.close() to throw IOException, as declared in the
+    Closeable interface.  This permits subclasses which override this
+    method to throw that exception. (cutting)
+
+ 3. Fix HADOOP-117.  Pathnames were mistakenly transposed in
+    JobConf.getLocalFile() causing many mapred temporary files to not
+    be removed.  (Raghavendra Prabhu via cutting)
+ 
+ 4. Fix HADOOP-116. Clean up job submission files when jobs complete.
+    (cutting)
+
+ 5. Fix HADOOP-125. Fix handling of absolute paths on Windows (cutting)
+
+Release 0.1.0 - 2006-04-01
+
+ 1. The first release of Hadoop.
+

+ 244 - 0
common/LICENSE.txt

@@ -0,0 +1,244 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+APACHE HADOOP SUBCOMPONENTS:
+
+The Apache Hadoop project contains subcomponents with separate copyright
+notices and license terms. Your use of the source code for the these
+subcomponents is subject to the terms and conditions of the following
+licenses. 
+
+For the org.apache.hadoop.util.bloom.* classes:
+
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract
+ * 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */

+ 2 - 0
common/NOTICE.txt

@@ -0,0 +1,2 @@
+This product includes software developed by The Apache Software
+Foundation (http://www.apache.org/).

+ 31 - 0
common/README.txt

@@ -0,0 +1,31 @@
+For the latest information about Hadoop, please visit our website at:
+
+   http://hadoop.apache.org/core/
+
+and our wiki, at:
+
+   http://wiki.apache.org/hadoop/
+
+This distribution includes cryptographic software.  The country in 
+which you currently reside may have restrictions on the import, 
+possession, use, and/or re-export to another country, of 
+encryption software.  BEFORE using any encryption software, please 
+check your country's laws, regulations and policies concerning the
+import, possession, or use, and re-export of encryption software, to 
+see if this is permitted.  See <http://www.wassenaar.org/> for more
+information.
+
+The U.S. Government Department of Commerce, Bureau of Industry and
+Security (BIS), has classified this software as Export Commodity 
+Control Number (ECCN) 5D002.C.1, which includes information security
+software using or performing cryptographic functions with asymmetric
+algorithms.  The form and manner of this Apache Software Foundation
+distribution makes it eligible for export under the License Exception
+ENC Technology Software Unrestricted (TSU) exception (see the BIS 
+Export Administration Regulations, Section 740.13) for both object 
+code and source code.
+
+The following provides more details on the included cryptographic
+software:
+  Hadoop Core uses the SSL libraries from the Jetty project written 
+by mortbay.org.

+ 123 - 0
common/bin/hadoop

@@ -0,0 +1,123 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script runs the hadoop core commands. 
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+ 
+. "$bin"/hadoop-config.sh
+
+function print_usage(){
+  echo "Usage: hadoop [--config confdir] COMMAND"
+  echo "       where COMMAND is one of:"
+  echo "  fs                   run a generic filesystem user client"
+  echo "  version              print the version"
+  echo "  jar <jar>            run a jar file"
+  echo "  distcp <srcurl> <desturl> copy file or directories recursively"
+  echo "  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
+  echo "  classpath            prints the class path needed to get the"
+  echo "                       Hadoop jar and the required libraries"
+  echo "  daemonlog            get/set the log level for each daemon"
+  echo " or"
+  echo "  CLASSNAME            run the class named CLASSNAME"
+  echo ""
+  echo "Most commands print help when invoked w/o parameters."
+}
+
+if [ $# = 0 ]; then
+  print_usage
+  exit
+fi
+
+COMMAND=$1
+case $COMMAND in
+  #hdfs commands
+  namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer)
+    echo "DEPRECATED: Use of this script to execute hdfs command is deprecated."
+    echo "Instead use the hdfs command for it."
+    echo ""
+    #try to locate hdfs and if present, delegate to it.  
+    if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
+      exec "${HADOOP_HDFS_HOME}"/bin/hdfs $*
+    elif [ -f "${HADOOP_HOME}"/bin/hdfs ]; then
+      exec "${HADOOP_HOME}"/bin/hdfs $*
+    else
+      echo "HDFS not found."
+      exit
+    fi
+    ;;
+
+  #mapred commands  
+  mradmin|jobtracker|tasktracker|pipes|job|queue)
+    echo "DEPRECATED: Use of this script to execute mapred command is deprecated."
+    echo "Instead use the mapred command for it."
+    echo ""
+    #try to locate mapred and if present, delegate to it.
+    if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
+      exec "${HADOOP_MAPRED_HOME}"/bin/mapred $*
+    elif [ -f "${HADOOP_HOME}"/bin/mapred ]; then
+      exec "${HADOOP_HOME}"/bin/mapred $* 
+    else
+      echo "MAPRED not found."
+      exit
+    fi
+    ;;
+
+  classpath)
+    if $cygwin; then
+      CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+    fi
+    echo $CLASSPATH
+    exit
+    ;;
+
+  #core commands  
+  *)
+    # the core commands
+    if [ "$COMMAND" = "fs" ] ; then
+      CLASS=org.apache.hadoop.fs.FsShell
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    elif [ "$COMMAND" = "version" ] ; then
+      CLASS=org.apache.hadoop.util.VersionInfo
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    elif [ "$COMMAND" = "jar" ] ; then
+      CLASS=org.apache.hadoop.util.RunJar
+    elif [ "$COMMAND" = "distcp" ] ; then
+      CLASS=org.apache.hadoop.tools.DistCp
+      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    elif [ "$COMMAND" = "daemonlog" ] ; then
+      CLASS=org.apache.hadoop.log.LogLevel
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    elif [ "$COMMAND" = "archive" ] ; then
+      CLASS=org.apache.hadoop.tools.HadoopArchives
+      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    else
+      CLASS=$COMMAND
+    fi
+    shift
+    
+    if $cygwin; then
+      CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+    fi
+    export CLASSPATH=$CLASSPATH
+    exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
+    ;;
+
+esac

+ 331 - 0
common/bin/hadoop-config.sh

@@ -0,0 +1,331 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# included in all the hadoop scripts with source command
+# should not be executable directly
+# also should not be passed any arguments, since we need original $*
+
+# resolve links - $0 may be a softlink
+
+this="${BASH_SOURCE-$0}"
+while [ -h "$this" ]; do
+  ls=`ls -ld "$this"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '.*/.*' > /dev/null; then
+    this="$link"
+  else
+    this=`dirname "$this"`/"$link"
+  fi
+done
+
+# convert relative path to absolute path
+common_bin=`dirname "$this"`
+script=`basename "$this"`
+common_bin=`cd "$common_bin"; pwd`
+this="$common_bin/$script"
+
+# the root of the Hadoop installation
+#TODO: change the env variable when dir structure is changed
+export HADOOP_HOME=`dirname "$this"`/..
+export HADOOP_COMMON_HOME="${HADOOP_HOME}"
+#export HADOOP_HOME=`dirname "$this"`/../..
+#export HADOOP_COMMON_HOME="${HADOOP_COMMON_HOME:-`dirname "$this"`/..}"
+
+#check to see if the conf dir is given as an optional argument
+if [ $# -gt 1 ]
+then
+    if [ "--config" = "$1" ]
+	  then
+	      shift
+	      confdir=$1
+	      shift
+	      HADOOP_CONF_DIR=$confdir
+    fi
+fi
+ 
+# Allow alternate conf dir location.
+export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_HOME/conf}"
+
+# User can specify hostnames or a file where the hostnames are (not both)
+if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
+  echo \
+    "Error: Please specify one variable HADOOP_SLAVES or " \
+    "HADOOP_SLAVE_NAME and not both."
+  exit 1
+fi
+
+# Process command line options that specify hosts or file with host
+# list
+if [ $# -gt 1 ]
+then
+    if [ "--hosts" = "$1" ]
+    then
+        shift
+        export HADOOP_SLAVES="${HADOOP_CONF_DIR}/$$1"
+        shift
+    elif [ "--hostnames" = "$1" ]
+    then
+        shift
+        export HADOOP_SLAVE_NAMES=$1
+        shift
+    fi
+fi
+
+# User can specify hostnames or a file where the hostnames are (not both)
+# (same check as above but now we know it's command line options that cause
+# the problem)
+if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
+  echo \
+    "Error: Please specify one of --hosts or --hostnames options and not both."
+  exit 1
+fi
+
+cygwin=false
+case "`uname`" in
+CYGWIN*) cygwin=true;;
+esac
+
+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+fi
+
+# some Java parameters
+if [ "$JAVA_HOME" != "" ]; then
+  #echo "run java in $JAVA_HOME"
+  JAVA_HOME=$JAVA_HOME
+fi
+  
+if [ "$JAVA_HOME" = "" ]; then
+  echo "Error: JAVA_HOME is not set."
+  exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m 
+
+# check envvars which might override default args
+if [ "$HADOOP_HEAPSIZE" != "" ]; then
+  #echo "run with heapsize $HADOOP_HEAPSIZE"
+  JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
+  #echo $JAVA_HEAP_MAX
+fi
+
+# CLASSPATH initially contains $HADOOP_CONF_DIR
+CLASSPATH="${HADOOP_CONF_DIR}"
+CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+
+# for developers, add Hadoop classes to CLASSPATH
+if [ -d "$HADOOP_COMMON_HOME/build/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build/classes
+fi
+if [ -d "$HADOOP_COMMON_HOME/build/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build
+fi
+if [ -d "$HADOOP_COMMON_HOME/build/test/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build/test/classes
+fi
+if [ -d "$HADOOP_COMMON_HOME/build/test/core/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build/test/core/classes
+fi
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# for releases, add core hadoop jar & webapps to CLASSPATH
+if [ -d "$HADOOP_COMMON_HOME/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME
+fi
+for f in $HADOOP_COMMON_HOME/hadoop-*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add libs to CLASSPATH
+for f in $HADOOP_COMMON_HOME/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+if [ -d "$HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Common/common" ]; then
+for f in $HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Common/common/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+fi
+
+if [ -d "$HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Hdfs/common" ]; then
+for f in $HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Hdfs/common/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+fi
+
+if [ -d "$HADOOP_COMMON_HOME/build/ivy/lib/Hadoop/common" ]; then
+for f in $HADOOP_COMMON_HOME/build/ivy/lib/Hadoop/common/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+fi
+
+for f in $HADOOP_COMMON_HOME/lib/jsp-2.1/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add user-specified CLASSPATH last
+if [ "$HADOOP_CLASSPATH" != "" ]; then
+  CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
+fi
+
+# default log directory & file
+if [ "$HADOOP_LOG_DIR" = "" ]; then
+  HADOOP_LOG_DIR="$HADOOP_HOME/logs"
+fi
+if [ "$HADOOP_LOGFILE" = "" ]; then
+  HADOOP_LOGFILE='hadoop.log'
+fi
+
+# default policy file for service-level authorization
+if [ "$HADOOP_POLICYFILE" = "" ]; then
+  HADOOP_POLICYFILE="hadoop-policy.xml"
+fi
+
+# restore ordinary behaviour
+unset IFS
+
+# cygwin path translation
+if $cygwin; then
+  HADOOP_COMMON_HOME=`cygpath -w "$HADOOP_COMMON_HOME"`
+  HADOOP_LOG_DIR=`cygpath -w "$HADOOP_LOG_DIR"`
+  JAVA_LIBRARY_PATH=`cygpath -w "$JAVA_LIBRARY_PATH"`
+fi
+
+# setup 'java.library.path' for native-hadoop code if necessary
+
+if [ -d "${HADOOP_COMMON_HOME}/build/native" -o -d "${HADOOP_COMMON_HOME}/lib/native" ]; then
+  JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} -Xmx32m ${HADOOP_JAVA_PLATFORM_OPTS} org.apache.hadoop.util.PlatformName | sed -e "s/ /_/g"`
+  
+  if [ -d "$HADOOP_COMMON_HOME/build/native" ]; then
+    if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+        JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_CORE_HOME}/build/native/${JAVA_PLATFORM}/lib
+    else
+        JAVA_LIBRARY_PATH=${HADOOP_CORE_HOME}/build/native/${JAVA_PLATFORM}/lib
+    fi
+  fi
+  
+  if [ -d "${HADOOP_COMMON_HOME}/lib/native" ]; then
+    if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_COMMON_HOME}/lib/native/${JAVA_PLATFORM}
+    else
+      JAVA_LIBRARY_PATH=${HADOOP_COMMON_HOME}/lib/native/${JAVA_PLATFORM}
+    fi
+  fi
+fi
+
+# cygwin path translation
+if $cygwin; then
+  JAVA_LIBRARY_PATH=`cygpath -p "$JAVA_LIBRARY_PATH"`
+fi
+
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_COMMON_HOME"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
+if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+  HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+fi  
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"
+
+# put hdfs in classpath if present
+if [ "$HADOOP_HDFS_HOME" = "" ]; then
+  if [ -d "${HADOOP_HOME}/hdfs" ]; then
+    HADOOP_HDFS_HOME=$HADOOP_HOME/hdfs
+    #echo Found HDFS installed at $HADOOP_HDFS_HOME
+  fi
+fi
+
+if [ -d "${HADOOP_HDFS_HOME}" ]; then
+
+  if [ -d "$HADOOP_HDFS_HOME/webapps" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME
+  fi
+  
+  if [ -d "${HADOOP_HDFS_HOME}/conf" ]; then
+    CLASSPATH=${CLASSPATH}:${HADOOP_HDFS_HOME}/conf
+  fi
+  
+  for f in $HADOOP_HDFS_HOME/hadoop-hdfs-*.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+  done
+
+  # add libs to CLASSPATH
+  for f in $HADOOP_HDFS_HOME/lib/*.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+  done
+  
+  if [ -d "$HADOOP_HDFS_HOME/build/classes" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/build/classes
+  fi
+fi
+
+# cygwin path translation
+if $cygwin; then
+  HADOOP_HDFS_HOME=`cygpath -w "$HADOOP_HDFS_HOME"`
+fi
+
+# set mapred home if mapred is present
+if [ "$HADOOP_MAPRED_HOME" = "" ]; then
+  if [ -d "${HADOOP_HOME}/mapred" ]; then
+    HADOOP_MAPRED_HOME=$HADOOP_HOME/mapred
+    #echo Found MAPRED installed at $HADOOP_MAPRED_HOME
+  fi
+fi
+
+if [ -d "${HADOOP_MAPRED_HOME}" ]; then
+
+  if [ -d "$HADOOP_MAPRED_HOME/webapps" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME
+  fi
+
+  if [ -d "${HADOOP_MAPRED_HOME}/conf" ]; then
+    CLASSPATH=${CLASSPATH}:${HADOOP_MAPRED_HOME}/conf
+  fi
+  
+  for f in $HADOOP_MAPRED_HOME/hadoop-mapred-*.jar; do
+    CLASSPATH=${CLASSPATH}:$f
+  done
+
+  for f in $HADOOP_MAPRED_HOME/lib/*.jar; do
+    CLASSPATH=${CLASSPATH}:$f
+  done
+
+  if [ -d "$HADOOP_MAPRED_HOME/build/classes" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/classes
+  fi
+
+  if [ -d "$HADOOP_MAPRED_HOME/build/tools" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/tools
+  fi
+
+  for f in $HADOOP_MAPRED_HOME/hadoop-mapred-tools-*.jar; do
+    TOOL_PATH=${TOOL_PATH}:$f;
+  done
+  for f in $HADOOP_MAPRED_HOME/build/hadoop-mapred-tools-*.jar; do
+    TOOL_PATH=${TOOL_PATH}:$f;
+  done
+fi
+
+# cygwin path translation
+if $cygwin; then
+  HADOOP_MAPRED_HOME=`cygpath -w "$HADOOP_MAPRED_HOME"`
+  TOOL_PATH=`cygpath -p -w "$TOOL_PATH"`
+fi
+
+

+ 167 - 0
common/bin/hadoop-daemon.sh

@@ -0,0 +1,167 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Runs a Hadoop command as a daemon.
+#
+# Environment Variables
+#
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
+#   HADOOP_LOG_DIR   Where log files are stored.  PWD by default.
+#   HADOOP_MASTER    host:path where hadoop code should be rsync'd from
+#   HADOOP_PID_DIR   The pid files are stored. /tmp by default.
+#   HADOOP_IDENT_STRING   A string representing this instance of hadoop. $USER by default
+#   HADOOP_NICENESS The scheduling priority for daemons. Defaults to 0.
+##
+
+usage="Usage: hadoop-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] [--script script] (start|stop) <hadoop-command> <args...>"
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+
+# get arguments
+
+#default value
+hadoopScript="$HADOOP_HOME"/bin/hadoop
+if [ "--script" = "$1" ]
+  then
+    shift
+    hadoopScript=$1
+    shift
+fi
+startStop=$1
+shift
+command=$1
+shift
+
+hadoop_rotate_log ()
+{
+    log=$1;
+    num=5;
+    if [ -n "$2" ]; then
+	num=$2
+    fi
+    if [ -f "$log" ]; then # rotate logs
+	while [ $num -gt 1 ]; do
+	    prev=`expr $num - 1`
+	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
+	    num=$prev
+	done
+	mv "$log" "$log.$num";
+    fi
+}
+
+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+fi
+
+# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
+if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
+  export HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
+  export HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
+  export HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER   
+fi
+
+if [ "$HADOOP_IDENT_STRING" = "" ]; then
+  export HADOOP_IDENT_STRING="$USER"
+fi
+
+
+# get log directory
+if [ "$HADOOP_LOG_DIR" = "" ]; then
+  export HADOOP_LOG_DIR="$HADOOP_HOME/logs"
+fi
+mkdir -p "$HADOOP_LOG_DIR"
+chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR
+
+if [ "$HADOOP_PID_DIR" = "" ]; then
+  HADOOP_PID_DIR=/tmp
+fi
+
+# some variables
+export HADOOP_LOGFILE=hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.log
+export HADOOP_ROOT_LOGGER="INFO,DRFA"
+log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.out
+pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid
+
+# Set default scheduling priority
+if [ "$HADOOP_NICENESS" = "" ]; then
+    export HADOOP_NICENESS=0
+fi
+
+case $startStop in
+
+  (start)
+
+    mkdir -p "$HADOOP_PID_DIR"
+
+    if [ -f $pid ]; then
+      if kill -0 `cat $pid` > /dev/null 2>&1; then
+        echo $command running as process `cat $pid`.  Stop it first.
+        exit 1
+      fi
+    fi
+
+    if [ "$HADOOP_MASTER" != "" ]; then
+      echo rsync from $HADOOP_MASTER
+      rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_HOME"
+    fi
+
+    hadoop_rotate_log $log
+    echo starting $command, logging to $log
+    cd "$HADOOP_HOME"
+    nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
+    echo $! > $pid
+    sleep 1; head "$log"
+    sleep 3;
+    if ! ps -p $! > /dev/null ; then
+      exit 1
+    fi
+    ;;
+          
+  (stop)
+
+    if [ -f $pid ]; then
+      if kill -0 `cat $pid` > /dev/null 2>&1; then
+        echo stopping $command
+        kill `cat $pid`
+      else
+        echo no $command to stop
+        exit 1
+      fi
+    else
+      echo no $command to stop
+      exit 1
+    fi
+    ;;
+
+  (*)
+    echo $usage
+    exit 1
+    ;;
+
+esac
+
+

+ 34 - 0
common/bin/hadoop-daemons.sh

@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Run a Hadoop command on all slave hosts.
+
+usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+. $bin/hadoop-config.sh
+
+exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_HOME" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"

+ 99 - 0
common/bin/rcc

@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# The Hadoop record compiler
+#
+# Environment Variables
+#
+#   JAVA_HOME        The java implementation to use.  Overrides JAVA_HOME.
+#
+#   HADOOP_OPTS      Extra Java runtime options.
+#
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
+#
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+
+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+fi
+
+# some Java parameters
+if [ "$JAVA_HOME" != "" ]; then
+  #echo "run java in $JAVA_HOME"
+  JAVA_HOME=$JAVA_HOME
+fi
+  
+if [ "$JAVA_HOME" = "" ]; then
+  echo "Error: JAVA_HOME is not set."
+  exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m 
+
+# CLASSPATH initially contains $HADOOP_CONF_DIR
+CLASSPATH="${HADOOP_CONF_DIR}"
+CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+
+# for developers, add Hadoop classes to CLASSPATH
+if [ -d "$HADOOP_HOME/build/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/classes
+fi
+if [ -d "$HADOOP_HOME/build/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build
+fi
+if [ -d "$HADOOP_HOME/build/test/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/test/classes
+fi
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# for releases, add core hadoop jar & webapps to CLASSPATH
+if [ -d "$HADOOP_HOME/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME
+fi
+for f in $HADOOP_HOME/hadoop-*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add libs to CLASSPATH
+for f in $HADOOP_HOME/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# restore ordinary behaviour
+unset IFS
+
+CLASS='org.apache.hadoop.record.compiler.generated.Rcc'
+
+# cygwin path translation
+if expr `uname` : 'CYGWIN*' > /dev/null; then
+  CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+fi
+
+# run it
+exec "$JAVA" $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"

+ 65 - 0
common/bin/slaves.sh

@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Run a shell command on all slave hosts.
+#
+# Environment Variables
+#
+#   HADOOP_SLAVES    File naming remote hosts.
+#     Default is ${HADOOP_CONF_DIR}/slaves.
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
+#   HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
+#   HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
+##
+
+usage="Usage: slaves.sh [--config confdir] command..."
+
+# if no args specified, show usage
+if [ $# -le 0 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+
+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+fi
+
+# Where to start the script, see hadoop-config.sh
+# (it set up the variables based on command line options)
+if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then
+  SLAVE_NAMES=$HADOOP_SLAVE_NAMES
+else
+  SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
+  SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed  's/#.*$//;/^$/d')
+fi
+
+# start the daemons
+for slave in $SLAVE_NAMES ; do
+ ssh $HADOOP_SSH_OPTS $slave $"${@// /\\ }" \
+   2>&1 | sed "s/^/$slave: /" &
+ if [ "$HADOOP_SLAVE_SLEEP" != "" ]; then
+   sleep $HADOOP_SLAVE_SLEEP
+ fi
+done
+
+wait

+ 36 - 0
common/bin/start-all.sh

@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Start all hadoop daemons.  Run this on master node.
+
+echo "This script is Deprecated. Instead use start-dfs.sh and start-mapred.sh"
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+
+# start hdfs daemons if hdfs is present
+if [ -f "${HADOOP_HDFS_HOME}"/bin/start-dfs.sh ]; then
+  "${HADOOP_HDFS_HOME}"/bin/start-dfs.sh --config $HADOOP_CONF_DIR
+fi
+
+# start mapred daemons if mapred is present
+if [ -f "${HADOOP_MAPRED_HOME}"/bin/start-mapred.sh ]; then
+  "${HADOOP_MAPRED_HOME}"/bin/start-mapred.sh --config $HADOOP_CONF_DIR
+fi

+ 37 - 0
common/bin/stop-all.sh

@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Stop all hadoop daemons.  Run this on master node.
+
+echo "This script is Deprecated. Instead use stop-dfs.sh and stop-mapred.sh"
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hadoop-config.sh
+
+# stop hdfs daemons if hdfs is present
+if [ -f "${HADOOP_HDFS_HOME}"/bin/stop-dfs.sh ]; then
+  "${HADOOP_HDFS_HOME}"/bin/stop-dfs.sh --config $HADOOP_CONF_DIR
+fi
+
+# stop mapred daemons if mapred is present
+if [ -f "${HADOOP_MAPRED_HOME}"/bin/stop-mapred.sh ]; then
+  "${HADOOP_MAPRED_HOME}"/bin/stop-mapred.sh --config $HADOOP_CONF_DIR
+fi
+

+ 1769 - 0
common/build.xml

@@ -0,0 +1,1769 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project name="Hadoop-Common" default="compile" 
+   xmlns:ivy="antlib:org.apache.ivy.ant"
+   xmlns:artifact="urn:maven-artifact-ant"> 
+
+  <!-- Load all the default properties, and any the user wants    -->
+  <!-- to contribute (without having to type -D or edit this file -->
+  <property file="${user.home}/build.properties" />
+  <property file="${basedir}/build.properties" />
+ 
+  <property name="Name" value="Hadoop-common"/>
+  <property name="name" value="hadoop-common"/>
+  <property name="version" value="0.22.0-SNAPSHOT"/>
+  <property name="final.name" value="${name}-${version}"/>
+  <property name="test.final.name" value="${name}-test-${version}"/>
+  <property name="year" value="2009"/>
+
+  <property name="src.dir" value="${basedir}/src"/>  	
+  <property name="java.src.dir" value="${src.dir}/java"/>
+  <property name="native.src.dir" value="${basedir}/src/native"/>
+
+  <property name="lib.dir" value="${basedir}/lib"/>
+  <property name="conf.dir" value="${basedir}/conf"/>
+  <property name="contrib.dir" value="${basedir}/src/contrib"/>
+  <property name="docs.src" value="${basedir}/src/docs"/>
+  <property name="src.docs.cn" value="${basedir}/src/docs/cn"/>
+  <property name="changes.src" value="${docs.src}/changes"/>
+
+  <property name="build.dir" value="${basedir}/build"/>
+  <property name="build.classes" value="${build.dir}/classes"/>
+  <property name="build.src" value="${build.dir}/src"/>
+  <property name="build.webapps" value="${build.dir}/webapps"/>
+
+  <!-- convert spaces to _ so that mac os doesn't break things -->
+  <exec executable="tr" inputstring="${os.name}" 
+        outputproperty="nonspace.os">
+     <arg value="[:space:]"/>
+     <arg value="_"/>
+  </exec>
+  <property name="build.platform" 
+            value="${nonspace.os}-${os.arch}-${sun.arch.data.model}"/>
+  <property name="jvm.arch" 
+            value="${sun.arch.data.model}"/>
+  <property name="build.native" value="${build.dir}/native/${build.platform}"/>
+  <property name="build.docs" value="${build.dir}/docs"/>
+  <property name="build.docs.cn" value="${build.dir}/docs/cn"/>
+  <property name="build.javadoc" value="${build.docs}/api"/>
+  <property name="build.javadoc.timestamp" value="${build.javadoc}/index.html" />
+  <property name="build.javadoc.dev" value="${build.docs}/dev-api"/>
+  <property name="build.encoding" value="ISO-8859-1"/>
+  <property name="install.c++" value="${build.dir}/c++/${build.platform}"/>
+
+  <property name="test.src.dir" value="${basedir}/src/test"/>
+  <property name="test.build.dir" value="${build.dir}/test"/>
+  <property name="test.generated.dir" value="${test.build.dir}/src"/>
+  <property name="test.build.data" value="${test.build.dir}/data"/>
+  <property name="test.cache.data" value="${test.build.dir}/cache"/>
+  <property name="test.debug.data" value="${test.build.dir}/debug"/>
+  <property name="test.log.dir" value="${test.build.dir}/logs"/>
+  <property name="test.build.classes" value="${test.build.dir}/classes"/>
+  <property name="test.build.extraconf" value="${test.build.dir}/extraconf"/>
+  <property name="test.build.javadoc" value="${test.build.dir}/docs/api"/>
+  <property name="test.build.javadoc.dev" value="${test.build.dir}/docs/dev-api"/>
+  <property name="test.build.webapps" value="${build.dir}/test/webapps"/>
+  <property name="test.include" value="Test*"/>
+  <property name="test.classpath.id" value="test.classpath"/>
+  <property name="test.output" value="no"/>
+  <property name="test.timeout" value="900000"/>
+  <property name="test.junit.output.format" value="plain"/>
+  <property name="test.junit.fork.mode" value="perTest" />
+  <property name="test.junit.printsummary" value="yes" />
+  <property name="test.junit.haltonfailure" value="no" />
+  <property name="test.junit.maxmemory" value="512m" />
+  <property name="test.conf.dir" value="${build.dir}/test/conf" />
+
+  <property name="test.core.build.classes" value="${test.build.dir}/core/classes"/>
+
+  <property name="test.all.tests.file" value="${test.src.dir}/all-tests"/>
+
+  <property name="javadoc.link.java"
+	    value="http://java.sun.com/javase/6/docs/api/"/>
+  <property name="javadoc.packages" value="org.apache.hadoop.*"/>
+  <property name="javadoc.maxmemory" value="512m" />
+
+  <property name="dist.dir" value="${build.dir}/${final.name}"/>
+
+  <property name="javac.debug" value="on"/>
+  <property name="javac.optimize" value="on"/>
+  <property name="javac.deprecation" value="off"/>
+  <property name="javac.version" value="1.6"/>
+  <property name="javac.args" value=""/>
+  <property name="javac.args.warnings" value="-Xlint:unchecked"/>
+
+  <property name="clover.db.dir" location="${build.dir}/test/clover/db"/>
+  <property name="clover.report.dir" location="${build.dir}/test/clover/reports"/>
+
+  <property name="rat.reporting.classname" value="rat.Report"/>
+
+  <property name="jdiff.build.dir" value="${build.docs}/jdiff"/>
+  <property name="jdiff.xml.dir" value="${lib.dir}/jdiff"/>
+  <property name="jdiff.stability" value="-unstable"/>
+  <property name="jdiff.compatibility" value=""/>
+  <property name="jdiff.stable" value="0.20.2"/>
+  <property name="jdiff.stable.javadoc" 
+            value="http://hadoop.apache.org/core/docs/r${jdiff.stable}/api/"/>
+
+  <property name="scratch.dir" value="${user.home}/tmp"/>
+  <property name="svn.cmd" value="svn"/>
+  <property name="grep.cmd" value="grep"/>
+  <property name="patch.cmd" value="patch"/>
+  <property name="make.cmd" value="make"/>
+
+	
+  <!-- IVY properteis set here -->
+  <property name="ivy.repo.dir" value="${user.home}/ivyrepo" />
+  <property name="ivy.dir" location="ivy" />
+  <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
+  <property name="asfrepo" value="https://repository.apache.org"/> 
+  <property name="asfsnapshotrepo" value="${asfrepo}/content/repositories/snapshots"/>
+  <property name="asfstagingrepo"
+  value="${asfrepo}/service/local/staging/deploy/maven2"/>
+  <property name="mvnrepo" value="http://repo2.maven.org/maven2"/>
+  <property name="ivy.jar" location="${ivy.dir}/ivy-${ivy.version}.jar"/>
+  <property name="ant_task.jar" location="${ivy.dir}/maven-ant-tasks-${ant-task.version}.jar"/>
+  <property name="ant_task_repo_url" 
+     value="${mvnrepo}/org/apache/maven/maven-ant-tasks/${ant-task.version}/maven-ant-tasks-${ant-task.version}.jar"/>
+  <property name="ivy_repo_url" value="${mvnrepo}/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar"/>
+  <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml" />
+  <property name="ivy.org" value="org.apache.hadoop"/>
+  <property name="build.dir" location="build" />
+  <property name="dist.dir" value="${build.dir}/${final.name}"/>
+  <property name="build.ivy.dir" location="${build.dir}/ivy" />
+  <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
+  <property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/>
+  <property name="build.ivy.report.dir" location="${build.ivy.dir}/report"/>
+  <property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven"/>
+  <property name="pom.xml" location="${build.ivy.maven.dir}/pom.xml"/>
+  <property name="hadoop-common.pom" location="${ivy.dir}/hadoop-common.xml"/>
+  <property name="build.ivy.maven.common.jar" location="${build.ivy.maven.dir}/hadoop-common-${version}.jar"/>
+  <property name="hadoop-common-test.pom" location="${ivy.dir}/hadoop-common-test.xml" />
+  <property name="build.ivy.maven.common-test.jar" location="${build.ivy.maven.dir}/hadoop-common-test-${version}.jar"/>
+
+  <!--this is the naming policy for artifacts we want pulled down-->
+  <property name="ivy.module" location="hadoop-common" />
+  <property name="ivy.artifact.retrieve.pattern" value="${ant.project.name}/[conf]/[artifact]-[revision].[ext]"/>
+
+  <!--this is how artifacts that get built are named-->
+  <property name="ivy.publish.pattern" value="[artifact]-[revision].[ext]"/>
+  <property name="hadoop-common.jar" location="${build.dir}/${final.name}.jar" />
+  <property name="hadoop-common-test.jar" location="${build.dir}/${test.final.name}.jar" />
+  <property name="hadoop-common-sources.jar" location="${build.dir}/${final.name}-sources.jar" />
+  <property name="hadoop-common-test-sources.jar" location="${build.dir}/${test.final.name}-sources.jar" />
+
+  <!-- jdiff.home property set -->
+  <property name="jdiff.home" value="${build.ivy.lib.dir}/${ant.project.name}/jdiff"/>
+  <property name="jdiff.jar" value="${jdiff.home}/jdiff-${jdiff.version}.jar"/>
+  <property name="xerces.jar" value="${jdiff.home}/xerces-${xerces.version}.jar"/>
+
+  <property name="clover.jar" location="${clover.home}/lib/clover.jar"/>
+  <available property="clover.present" file="${clover.jar}" />
+	
+  <!-- Eclipse properties -->
+  <property name="build.dir.eclipse" value="build/eclipse"/>
+  <property name="build.dir.eclipse-main-classes" value="${build.dir.eclipse}/classes-main"/>
+  <property name="build.dir.eclipse-test-classes" value="${build.dir.eclipse}/classes-test"/>
+  <property name="build.dir.eclipse-test-generated-classes" value="${build.dir.eclipse}/classes-test-generated"/>
+
+  <!-- check if clover reports should be generated -->
+  <condition property="clover.enabled">
+    <and>
+        <isset property="run.clover"/>
+        <isset property="clover.present"/>
+    </and>
+  </condition>
+
+  <condition property="staging">
+     <equals arg1="${repo}" arg2="staging"/>
+  </condition>
+
+<!-- the normal classpath -->
+  <path id="classpath">
+    <pathelement location="${build.classes}"/>
+    <pathelement location="${conf.dir}"/>
+    <path refid="ivy-common.classpath"/>
+  </path>
+
+  <path id="test.classpath">
+    <pathelement location="${test.build.extraconf}"/>
+    <pathelement location="${test.core.build.classes}" />
+    <pathelement location="${test.src.dir}"/>
+    <pathelement location="${test.build.dir}"/>
+    <pathelement location="${build.dir}"/>
+    <pathelement location="${build.examples}"/>
+    <pathelement location="${build.tools}"/>
+    <pathelement path="${clover.jar}"/>
+    <path refid="ivy-common.classpath"/>
+    <path refid="ivy-test.classpath"/>
+    <pathelement location="${build.classes}"/>
+    <pathelement location="${test.conf.dir}"/>
+  </path>
+<!--
+  <path id="test.hdfs.classpath">
+    <pathelement location="${test.hdfs.build.classes}" />
+    <path refid="test.classpath"/>
+  </path>
+
+  <path id="test.mapred.classpath">
+    <pathelement location="${test.mapred.build.classes}" />
+    <path refid="test.hdfs.classpath"/>
+  </path>
+
+  <path id="test.hdfs.with.mr.classpath">
+    <pathelement location="${test.hdfs.with.mr.build.classes}" />
+    <path refid="test.mapred.classpath"/>
+  </path>
+-->
+  <!-- the cluster test classpath: uses conf.dir for configuration -->
+  <path id="test.cluster.classpath">
+    <path refid="classpath"/>
+    <pathelement location="${test.build.classes}" />
+    <pathelement location="${test.src.dir}"/>
+    <pathelement location="${build.dir}"/>
+  </path>
+
+
+  <!-- ====================================================== -->
+  <!-- Macro definitions                                      -->
+  <!-- ====================================================== -->
+  <macrodef name="macro_tar" description="Worker Macro for tar">
+    <attribute name="param.destfile"/>
+    <element name="param.listofitems"/>
+    <sequential>
+      <tar compression="gzip" longfile="gnu"
+      destfile="@{param.destfile}">
+      <param.listofitems/>
+      </tar>
+    </sequential>
+  </macrodef>
+
+  <!-- ====================================================== -->
+  <!-- Stuff needed by all targets                            -->
+  <!-- ====================================================== -->
+  <target name="init" depends="ivy-retrieve-common">
+    <mkdir dir="${build.dir}"/>
+    <mkdir dir="${build.classes}"/>
+    <mkdir dir="${build.src}"/>
+    <mkdir dir="${build.webapps}"/>
+ 
+    <mkdir dir="${test.build.dir}"/>
+    <mkdir dir="${test.build.classes}"/>
+    <mkdir dir="${test.build.extraconf}"/>
+    <tempfile property="touch.temp.file" destDir="${java.io.tmpdir}"/>
+    <touch millis="0" file="${touch.temp.file}">
+      <fileset dir="${conf.dir}" includes="**/*.template"/>
+      <fileset dir="${contrib.dir}" includes="**/*.template"/>
+    </touch>
+    <delete file="${touch.temp.file}"/>
+    <!-- copy all of the jsp and static files -->
+
+    <copy todir="${conf.dir}" verbose="true">
+      <fileset dir="${conf.dir}" includes="**/*.template"/>
+      <mapper type="glob" from="*.template" to="*"/>
+    </copy>
+
+    <mkdir dir="${test.conf.dir}"/>
+    <copy todir="${test.conf.dir}" verbose="true">
+      <fileset dir="${conf.dir}" includes="**/*.template"/>
+      <mapper type="glob" from="*.template" to="*"/>
+    </copy>
+
+    <copy todir="${contrib.dir}" verbose="true">
+      <fileset dir="${contrib.dir}" includes="**/*.template"/>
+      <mapper type="glob" from="*.template" to="*"/>
+    </copy>
+
+    <exec executable="sh">
+       <arg line="src/saveVersion.sh ${version} ${build.dir}"/>
+    </exec>
+	
+   <exec executable="sh">
+       <arg line="src/fixFontsPath.sh ${src.docs.cn}"/>
+   </exec>
+  </target>
+
+  <import file="${test.src.dir}/aop/build/aop.xml"/>
+
+  <!-- ====================================================== -->
+  <!-- Compile the Java files                                 -->
+  <!-- ====================================================== -->
+  <target name="record-parser" depends="init" if="javacc.home">
+      <javacc
+          target="${java.src.dir}/org/apache/hadoop/record/compiler/generated/rcc.jj"
+          outputdirectory="${java.src.dir}/org/apache/hadoop/record/compiler/generated"
+          javacchome="${javacc.home}" />
+  </target>
+  
+  <target name="compile-rcc-compiler" depends="init, record-parser">
+    <javac 
+        encoding="${build.encoding}" 
+        srcdir="${java.src.dir}"
+        includes="org/apache/hadoop/record/compiler/**/*.java"
+        destdir="${build.classes}"
+        debug="${javac.debug}"
+        optimize="${javac.optimize}"
+        target="${javac.version}"
+        source="${javac.version}"
+        deprecation="${javac.deprecation}">
+        <compilerarg line="${javac.args}"/>
+        <classpath refid="classpath"/>
+    </javac>
+    
+    <taskdef name="recordcc" classname="org.apache.hadoop.record.compiler.ant.RccTask">
+      <classpath refid="classpath" />
+    </taskdef>
+  </target>
+  
+  <target name="compile-core-classes" depends="init, compile-rcc-compiler">
+    <!-- Compile Java files (excluding JSPs) checking warnings -->
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${java.src.dir};${build.src}"	
+     includes="org/apache/hadoop/**/*.java"
+     destdir="${build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <compilerarg line="${javac.args} ${javac.args.warnings}" />
+      <classpath refid="classpath"/>
+    </javac>
+
+    <copy todir="${build.classes}">
+      <fileset dir="${java.src.dir}" includes="**/*.properties"/>
+      <fileset dir="${java.src.dir}" includes="core-default.xml"/>
+    </copy>
+     
+  </target>
+
+  <target name="compile-native">
+    <antcall target="compile-core-native">
+      <param name="compile.native" value="true"/>
+    </antcall> 
+  </target>
+
+  <target name="compile-core-native" depends="compile-core-classes"
+          if="compile.native">
+  	
+    <mkdir dir="${build.native}/lib"/>
+    <mkdir dir="${build.native}/src/org/apache/hadoop/io/compress/zlib"/>
+    <mkdir dir="${build.native}/src/org/apache/hadoop/io/nativeio"/>
+    <mkdir dir="${build.native}/src/org/apache/hadoop/security"/>
+
+  	<javah 
+  	  classpath="${build.classes}"
+  	  destdir="${build.native}/src/org/apache/hadoop/io/compress/zlib"
+      force="yes"
+  	  verbose="yes"
+  	  >
+  	  <class name="org.apache.hadoop.io.compress.zlib.ZlibCompressor" />
+      <class name="org.apache.hadoop.io.compress.zlib.ZlibDecompressor" />
+  	</javah>
+
+  	<javah
+  	  classpath="${build.classes}"
+  	  destdir="${build.native}/src/org/apache/hadoop/security"
+      force="yes"
+  	  verbose="yes"
+  	  >
+  	  <class name="org.apache.hadoop.security.JniBasedUnixGroupsMapping" />
+  	</javah>
+  	<javah
+  	  classpath="${build.classes}"
+  	  destdir="${build.native}/src/org/apache/hadoop/io/nativeio"
+      force="yes"
+  	  verbose="yes"
+  	  >
+  	  <class name="org.apache.hadoop.io.nativeio.NativeIO" />
+  	</javah>
+
+  	<javah
+  	  classpath="${build.classes}"
+  	  destdir="${build.native}/src/org/apache/hadoop/security"
+      force="yes"
+  	  verbose="yes"
+  	  >
+  	  <class name="org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping" />
+  	</javah>
+
+	<exec dir="${build.native}" executable="sh" failonerror="true">
+	  <env key="OS_NAME" value="${os.name}"/>
+	  <env key="OS_ARCH" value="${os.arch}"/>
+	  <env key="JVM_DATA_MODEL" value="${sun.arch.data.model}"/>
+	  <env key="HADOOP_NATIVE_SRCDIR" value="${native.src.dir}"/>
+	  <arg line="${native.src.dir}/configure"/>
+    </exec>
+
+    <exec dir="${build.native}" executable="${make.cmd}" failonerror="true">
+      <env key="OS_NAME" value="${os.name}"/>
+      <env key="OS_ARCH" value="${os.arch}"/>
+  	  <env key="JVM_DATA_MODEL" value="${sun.arch.data.model}"/>
+  	  <env key="HADOOP_NATIVE_SRCDIR" value="${native.src.dir}"/>
+    </exec>
+
+	<exec dir="${build.native}" executable="sh" failonerror="true">
+	  <arg line="${build.native}/libtool --mode=install cp ${build.native}/libhadoop.la ${build.native}/lib"/>
+    </exec>
+
+  </target>
+
+  <target name="compile-core"
+          depends="clover,compile-core-classes,
+  	compile-core-native" 
+  	description="Compile core only">
+  </target>
+
+  <target name="compile-contrib" depends="compile-core">
+     <subant target="compile">
+        <property name="version" value="${version}"/>
+        <fileset file="${contrib.dir}/build.xml"/>
+     </subant>  	
+  </target>
+  
+  <target name="compile" depends="compile-core, compile-contrib" description="Compile core, contrib">
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make hadoop-common.jar                                               -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="jar" depends="compile-core" description="Make hadoop-common.jar">
+    <tar compression="gzip" destfile="${build.classes}/bin.tgz">
+      <tarfileset dir="bin" mode="755"/>
+    </tar>
+    <property name="jar.properties.list" value="commons-logging.properties, log4j.properties, hadoop-metrics.properties" />
+    <jar jarfile="${build.dir}/${final.name}.jar"
+         basedir="${build.classes}">
+      <manifest>
+        <section name="org/apache/hadoop">
+          <attribute name="Implementation-Title" value="${ant.project.name}"/>
+          <attribute name="Implementation-Version" value="${version}"/>
+          <attribute name="Implementation-Vendor" value="Apache"/>
+        </section>
+      </manifest>
+      <fileset dir="${conf.dir}" includes="${jar.properties.list}" />
+      <fileset file="${jar.extra.properties.list}" />
+    </jar>
+
+    <jar jarfile="${hadoop-common-sources.jar}">
+      <fileset dir="${java.src.dir}" includes="org/apache/hadoop/**/*.java"/>
+      <fileset dir="${build.src}" includes="org/apache/hadoop/**/*.java"/>
+    </jar>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make the Hadoop metrics jar. (for use outside Hadoop)              -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="metrics.jar" depends="compile-core" description="Make the Hadoop metrics jar. (for use outside Hadoop)">
+    <jar jarfile="${build.dir}/hadoop-metrics-${version}.jar"
+         basedir="${build.classes}">
+      <include name="**/metrics/**" />
+      <exclude name="**/package.html" />
+    </jar>
+  </target>
+
+  <target name="metrics2.jar" depends="compile-core" description="Make the Hadoop metrics2 framework jar (for use plugin development)">
+    <jar jarfile="${build.dir}/hadoop-metrics2-${version}.jar"
+         basedir="${build.classes}">
+      <include name="**/metrics2/**" />
+    </jar>
+  </target>
+
+  <target name="generate-test-records" depends="compile-rcc-compiler">
+    <recordcc destdir="${test.generated.dir}">
+      <fileset dir="${test.src.dir}"
+	         includes="**/*.jr" />
+    </recordcc>
+  </target>
+
+  <target name="generate-avro-records" depends="init, ivy-retrieve-test">
+    <taskdef name="schema" classname="org.apache.avro.specific.SchemaTask">
+      <classpath refid="test.classpath"/>
+    </taskdef>
+    <schema destdir="${test.generated.dir}">
+      <fileset dir="${test.src.dir}">
+        <include name="**/*.avsc" />
+      </fileset>
+    </schema>
+  </target>
+
+  <target name="generate-avro-protocols" depends="init, ivy-retrieve-test">
+    <taskdef name="schema" classname="org.apache.avro.specific.ProtocolTask">
+      <classpath refid="test.classpath"/>
+    </taskdef>
+    <schema destdir="${test.generated.dir}">
+      <fileset dir="${test.src.dir}">
+        <include name="**/*.avpr" />
+     </fileset>
+    </schema>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Compile test code                                                  --> 
+  <!-- ================================================================== -->
+  <!-- This is a wrapper for fault-injection needs-->
+  <target name="-classes-compilation"
+    depends="compile-core-classes, compile-core-test"/> 
+
+  <target name="compile-core-test" depends="compile-core-classes, ivy-retrieve-test, generate-test-records, generate-avro-records, generate-avro-protocols">
+    <mkdir dir="${test.core.build.classes}"/>
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${test.generated.dir}"
+     includes="org/apache/hadoop/**/*.java"
+     destdir="${test.core.build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <compilerarg line="${javac.args}"/>
+      <classpath refid="test.classpath"/>
+    </javac>
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${test.src.dir}/core"
+     includes="org/apache/hadoop/**/*.java"
+     destdir="${test.core.build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <compilerarg line="${javac.args} ${javac.args.warnings}" />
+      <classpath refid="test.classpath"/>
+     </javac>
+
+    <taskdef
+       name="paranamer" 
+       classname="com.thoughtworks.paranamer.ant.ParanamerGeneratorTask">
+      <classpath refid="classpath" />
+    </taskdef>
+    <paranamer sourceDirectory="${test.src.dir}/core"
+	       outputDirectory="${test.core.build.classes}"/>
+
+    <delete dir="${test.cache.data}"/>
+    <mkdir dir="${test.cache.data}"/>
+    <copy file="${test.src.dir}/core/org/apache/hadoop/cli/testConf.xml" todir="${test.cache.data}"/>
+
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make hadoop-test.jar                                               -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="jar-test" depends="compile-core-test" description="Make hadoop-test.jar">
+    <copy todir="${test.build.classes}">
+      <fileset dir="${test.core.build.classes}"/>
+    </copy>
+    <jar jarfile="${build.dir}/${test.final.name}.jar"
+         basedir="${test.build.classes}">
+         <manifest>
+           <attribute name="Main-Class"
+                      value="org/apache/hadoop/test/CoreTestDriver"/>
+          <section name="org/apache/hadoop">
+            <attribute name="Implementation-Title" value="${ant.project.name}"/>
+            <attribute name="Implementation-Version" value="${version}"/>
+            <attribute name="Implementation-Vendor" value="Apache"/>
+          </section>
+         </manifest>
+    </jar>
+
+    <jar jarfile="${hadoop-common-test-sources.jar}">
+      <fileset dir="${test.generated.dir}" includes="org/apache/hadoop/**/*.java"/>
+      <fileset dir="${test.src.dir}/core" includes="org/apache/hadoop/**/*.java"/>
+    </jar>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Fault injection customization section.
+       These targets ought to be copied over to other projects and modified
+       as needed -->
+  <!-- ================================================================== -->
+  <target name="run-test-core-fault-inject" depends="injectfaults" 
+	  description="Run full set of the unit tests with fault injection">
+    <macro-run-tests-fault-inject target.name="run-test-core"
+      testcasesonly="false"/>
+  </target>
+
+  <target name="jar-test-fault-inject" depends="injectfaults" 
+    description="Make hadoop-test-fi.jar">
+    <macro-jar-test-fault-inject
+      target.name="jar-test"
+      jar.final.name="test.final.name"
+      jar.final.value="${test.final.name}-fi" />
+  </target>
+
+  <target name="jar-fault-inject" depends="injectfaults" 
+    description="Make hadoop-fi.jar">
+    <macro-jar-fault-inject
+      target.name="jar"
+      build.dir="${build-fi.dir}"
+      jar.final.name="final.name"
+      jar.final.value="${final.name}-fi" />
+  </target>
+
+  <!--This target is not included into the the top level list of target
+  for it serves a special "regression" testing purpose of non-FI tests in
+  FI environment -->
+  <target name="run-fault-inject-with-testcaseonly" depends="injectfaults">
+    <fail unless="testcase">Can't run this target without -Dtestcase setting!
+    </fail>
+    <macro-run-tests-fault-inject target.name="run-test-core" 
+      testcasesonly="true"/>
+  </target>
+  <!-- ================================================================== -->
+  <!-- End of Fault injection customization section                       -->
+  <!-- ================================================================== -->
+
+  <condition property="tests.notestcase">
+    <and>
+      <isfalse value="${test.fault.inject}"/>
+      <not>
+        <isset property="testcase"/>
+      </not>
+    </and>
+  </condition>
+  <condition property="tests.notestcase.fi">
+    <and>
+      <not>
+        <isset property="testcase" />
+      </not>
+      <istrue value="${test.fault.inject}" />
+    </and>
+  </condition>
+  <condition property="tests.testcase">
+    <and>
+      <isfalse value="${test.fault.inject}" />
+      <isset property="testcase" />
+    </and>
+  </condition>
+  <condition property="tests.testcaseonly">
+    <istrue value="${special.fi.testcasesonly}" />
+  </condition>
+  <condition property="tests.testcase.fi">
+    <and>
+      <istrue value="${test.fault.inject}" />
+      <isset property="testcase" />
+      <isfalse value="${special.fi.testcasesonly}" />
+    </and>
+  </condition>
+	     
+  <!-- ================================================================== -->
+  <!-- Run unit tests                                                     --> 
+  <!-- ================================================================== -->
+  <macrodef name="macro-test-runner">
+    <attribute name="test.file" />
+    <attribute name="classpath" />
+    <attribute name="test.dir" />
+    <attribute name="fileset.dir" />
+    <attribute name="hadoop.conf.dir.deployed" default="" />
+    <attribute name="test.krb5.conf.filename" default="" />
+    <sequential>
+      <delete file="${test.build.dir}/testsfailed"/>
+      <delete dir="@{test.dir}/data" />
+      <mkdir dir="@{test.dir}/data" />
+      <delete dir="${test.build.webapps}"/>
+      <copy todir="${test.build.webapps}">
+        <fileset dir="${test.src.dir}/test-webapps" includes="**/*" />
+      </copy>
+      <delete dir="@{test.dir}/logs" />
+      <mkdir dir="@{test.dir}/logs" />
+      <copy file="${test.src.dir}/hadoop-policy.xml"
+            todir="@{test.dir}/extraconf" />
+      <copy file="${test.src.dir}/fi-site.xml"
+            todir="@{test.dir}/extraconf" />
+      <junit showoutput="${test.output}"
+             printsummary="${test.junit.printsummary}"
+             haltonfailure="${test.junit.haltonfailure}"
+             fork="yes"
+             forkmode="${test.junit.fork.mode}"
+             maxmemory="${test.junit.maxmemory}"
+             dir="${basedir}"
+             timeout="${test.timeout}"
+             errorProperty="tests.failed"
+             failureProperty="tests.failed">
+        <jvmarg value="-ea" />
+        <sysproperty key="test.build.data" value="${test.build.data}" />
+        <sysproperty key="test.cache.data" value="${test.cache.data}" />
+        <sysproperty key="test.debug.data" value="${test.debug.data}" />
+        <sysproperty key="hadoop.log.dir" value="${test.log.dir}" />
+        <sysproperty key="test.src.dir" value="${test.src.dir}" />
+        <sysproperty key="test.build.extraconf" value="@{test.dir}/extraconf" />
+         <sysproperty key="java.security.krb5.conf" value="@{test.krb5.conf.filename}"/>
+        <sysproperty key="hadoop.policy.file" value="hadoop-policy.xml" />
+        <sysproperty key="java.library.path"
+          value="${build.native}/lib:${lib.dir}/native/${build.platform}"/>
+        <sysproperty key="install.c++.examples" value="${install.c++.examples}"/>
+        <!-- set io.compression.codec.lzo.class in the child jvm only if it is set -->
+        <syspropertyset dynamic="no">
+          <propertyref name="io.compression.codec.lzo.class"/>
+        </syspropertyset>
+        <!-- set compile.c++ in the child jvm only if it is set -->
+        <syspropertyset dynamic="no">
+          <propertyref name="compile.c++"/>
+        </syspropertyset>
+        <classpath refid="@{classpath}" />
+        <!-- Pass probability specifications to the spawn JVM -->
+        <syspropertyset id="FaultProbabilityProperties">
+          <propertyref regex="fi.*"/>
+        </syspropertyset>
+        <sysproperty key="test.system.hdrc.deployed.hadoopconfdir"
+                     value="@{hadoop.conf.dir.deployed}" />
+        <!-- user to group mapping class for TestAccessControlList -->
+        <syspropertyset dynamic="no">
+          <propertyref name="TestAccessControlListGroupMapping"/>
+        </syspropertyset>
+        <formatter type="${test.junit.output.format}" />
+        <batchtest todir="@{test.dir}" if="tests.notestcase">
+          <fileset dir="@{fileset.dir}/core"
+                   excludes="**/${test.exclude}.java aop/** system/**">
+             <patternset>
+               <includesfile name="@{test.file}"/>
+             </patternset>
+         </fileset>
+        </batchtest>
+        <batchtest todir="${test.build.dir}" if="tests.notestcase.fi">
+          <fileset dir="@{fileset.dir}/aop"
+                   includes="**/${test.include}.java"
+                   excludes="**/${test.exclude}.java" />
+         </batchtest>
+         <batchtest todir="@{test.dir}" if="tests.testcase">
+           <fileset dir="@{fileset.dir}/core"
+             includes="**/${testcase}.java" excludes="aop/** system/**"/>
+         </batchtest>
+         <batchtest todir="${test.build.dir}" if="tests.testcase.fi">
+           <fileset dir="@{fileset.dir}/aop" includes="**/${testcase}.java" />
+         </batchtest>
+         <!--The following batch is for very special occasions only when
+                a non-FI tests are needed to be executed against FI-environment -->
+         <batchtest todir="${test.build.dir}" if="tests.testcaseonly">
+           <fileset dir="@{fileset.dir}/core" includes="**/${testcase}.java" />
+         </batchtest>
+      </junit>
+      <antcall target="checkfailure"/>
+    </sequential>
+  </macrodef>
+
+  <target name="run-test-core" depends="compile-core-test" description="Run core unit tests">
+    <macro-test-runner test.file="${test.all.tests.file}"
+                       classpath="${test.classpath.id}"
+                       test.dir="${test.build.dir}"
+                       fileset.dir="${test.src.dir}"
+                       test.krb5.conf.filename="${test.src.dir}/krb5.conf"
+                       >
+    </macro-test-runner>
+  </target>   
+
+  <target name="checkfailure" if="tests.failed">
+    <touch file="${test.build.dir}/testsfailed"/>
+    <fail unless="continueOnFailure">Tests failed!</fail>
+  </target>
+
+  <target name="test-contrib" depends="compile, compile-core-test" description="Run contrib unit tests">
+    <subant target="test">
+       <property name="version" value="${version}"/>
+       <property name="clover.jar" value="${clover.jar}"/>
+       <fileset file="${contrib.dir}/build.xml"/>
+    </subant> 
+  </target>
+
+  <target name="test-core" description="Run core unit tests"
+          depends="run-test-core"/>
+
+  <target name="test-fi" description="Run fi unit tests"
+          depends="run-test-core-fault-inject"/>
+
+  <target name="test" depends="jar-test" description="Run all unit tests">
+    <delete file="${test.build.dir}/testsfailed"/> 
+    <property name="continueOnFailure" value="true"/> 
+    <antcall target="run-test-core"/>
+    <antcall target="run-test-core-fault-inject"/>
+    <subant target="test-contrib">	 
+      <fileset dir="." includes="build.xml"/>
+    </subant>
+    <available file="${test.build.dir}/testsfailed" property="testsfailed"/>
+    <fail if="testsfailed">Tests failed!</fail>
+  </target>
+
+  <!-- Run all unit tests, not just Test*, and use non-test configuration. -->
+  <target name="test-cluster" description="Run all unit tests, not just Test*, and use non-test configuration.">
+    <antcall target="test">
+      <param name="test.include" value="*"/>
+      <param name="test.classpath.id" value="test.cluster.classpath"/>
+    </antcall>
+  </target>
+
+  <target name="nightly" depends="test, tar">
+  </target>
+	
+  <!-- ================================================================== -->
+  <!-- Run optional third-party tool targets                              --> 
+  <!-- ================================================================== -->
+  <target name="checkstyle" depends="ivy-retrieve-checkstyle,check-for-checkstyle" if="checkstyle.present" 
+       description="Run optional third-party tool targets">
+       <taskdef resource="checkstyletask.properties">
+         <classpath refid="checkstyle-classpath"/>
+       </taskdef>
+  
+	<mkdir dir="${test.build.dir}"/>
+  	
+  	<checkstyle config="${test.src.dir}/checkstyle.xml"
+  		failOnViolation="false">
+      <fileset dir="${java.src.dir}" includes="**/*.java" excludes="**/generated/**"/>
+      <formatter type="xml" toFile="${test.build.dir}/checkstyle-errors.xml"/>
+  	</checkstyle>
+  	
+  	<xslt style="${test.src.dir}/checkstyle-noframes-sorted.xsl"
+        in="${test.build.dir}/checkstyle-errors.xml"
+        out="${test.build.dir}/checkstyle-errors.html"/>
+  </target>
+	
+  <target name="check-for-checkstyle">
+    <available property="checkstyle.present" resource="checkstyletask.properties">
+       <classpath refid="checkstyle-classpath"/>
+    </available>  	
+  </target>
+
+
+ <property name="findbugs.home" value=""/>
+  <target name="findbugs" depends="check-for-findbugs, jar" if="findbugs.present" description="Run findbugs if present">
+    <property environment="env"/>
+    <property name="findbugs.out.dir" value="${test.build.dir}/findbugs"/>
+    <property name="findbugs.exclude.file" value="${test.src.dir}/findbugsExcludeFile.xml"/>
+    <property name="findbugs.report.htmlfile" value="${findbugs.out.dir}/hadoop-findbugs-report.html"/>
+    <property name="findbugs.report.xmlfile" value="${findbugs.out.dir}/hadoop-findbugs-report.xml"/>
+    <taskdef name="findbugs" classname="edu.umd.cs.findbugs.anttask.FindBugsTask"
+        classpath="${findbugs.home}/lib/findbugs-ant.jar" />
+
+        <mkdir dir="${findbugs.out.dir}"/>
+
+    <findbugs home="${findbugs.home}" output="xml:withMessages"
+        outputFile="${findbugs.report.xmlfile}" effort="max"
+        excludeFilter="${findbugs.exclude.file}" jvmargs="-Xmx512M">
+      <auxClasspath>
+        <fileset dir="${env.ANT_HOME}/lib">
+          <include name="ant.jar"/>
+          <include name="ant-launcher.jar"/>
+        </fileset>
+        <fileset dir="${build.ivy.lib.dir}/${ant.project.name}/common">
+          <include name="**/*.jar"/>
+        </fileset>
+      </auxClasspath>
+      <sourcePath path="${java.src.dir}"/>
+      <class location="${basedir}/build/${final.name}.jar" />
+    </findbugs>
+
+        <xslt style="${findbugs.home}/src/xsl/default.xsl"
+        in="${findbugs.report.xmlfile}"
+        out="${findbugs.report.htmlfile}"/>
+  </target>
+	
+  <target name="check-for-findbugs">
+    <available property="findbugs.present"
+        file="${findbugs.home}/lib/findbugs.jar" />
+  </target>
+
+
+  <!-- ================================================================== -->
+  <!-- Documentation                                                      -->
+  <!-- ================================================================== -->
+  
+  <target name="docs" depends="forrest.check" description="Generate forrest-based documentation. 
+       To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." if="forrest.home">
+    <exec dir="${docs.src}" executable="${forrest.home}/bin/forrest"
+	  failonerror="true">
+      <env key="JAVA_HOME" value="${java5.home}"/>
+    </exec>
+    <copy todir="${build.docs}">
+      <fileset dir="${docs.src}/build/site/" />
+    </copy>
+    <copy file="${docs.src}/releasenotes.html" todir="${build.docs}"/>
+    <style basedir="${java.src.dir}" destdir="${build.docs}"
+           includes="core-default.xml" style="conf/configuration.xsl"/>
+    <antcall target="changes-to-html"/>
+    <antcall target="cn-docs"/>
+  </target>
+
+  <target name="cn-docs" depends="forrest.check, init" description="Generate forrest-based Chinese documentation. 
+        To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." 
+        if="forrest.home">
+    <exec dir="${src.docs.cn}" executable="${forrest.home}/bin/forrest" failonerror="true">
+      <env key="LANG" value="en_US.utf8"/>
+      <env key="JAVA_HOME" value="${java5.home}"/>
+    </exec>
+    <copy todir="${build.docs.cn}">
+      <fileset dir="${src.docs.cn}/build/site/" />
+    </copy>
+    <style basedir="${java.src.dir}" destdir="${build.docs.cn}"
+           includes="core-default.xml" style="conf/configuration.xsl"/>
+    <antcall target="changes-to-html"/>
+  </target>
+
+  <target name="forrest.check" unless="forrest.home" depends="java5.check">
+    <fail message="'forrest.home' is not defined. Please pass 
+      -Dforrest.home=&lt;base of Apache Forrest installation&gt; to Ant on the command-line." />
+  </target>
+
+  <target name="java5.check" unless="java5.home">
+    <fail message="'java5.home' is not defined.  Forrest requires Java 5.  
+       Please pass -Djava5.home=&lt;base of Java 5 distribution&gt; to Ant on the command-line." />
+  </target>
+	
+  <target name="javadoc-dev" depends="compile, ivy-retrieve-javadoc" description="Generate javadoc for hadoop developers">
+    <mkdir dir="${build.javadoc.dev}"/>
+    <javadoc
+      overview="${java.src.dir}/overview.html"
+      packagenames="org.apache.hadoop.*"
+      destdir="${build.javadoc.dev}"
+      author="true"
+      version="true"
+      use="true"
+      windowtitle="${Name} ${version} API"
+      doctitle="${Name} ${version} Developer API"
+      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
+      maxmemory="${javadoc.maxmemory}">
+        <packageset dir="${java.src.dir}"/>
+	<packageset dir="src/contrib/failmon/src/java/"/> 
+
+        <link href="${javadoc.link.java}"/>
+
+        <classpath >
+          <path refid="classpath" />
+          <fileset dir="src/contrib/">
+            <include name="*/lib/*.jar" />
+          </fileset>
+          <path refid="javadoc-classpath"/>
+          <pathelement path="${java.class.path}"/>
+          <pathelement location="${build.tools}"/>
+        </classpath>
+
+    	<group title="Core" packages="org.apache.*"/>
+        <group title="contrib: FailMon" packages="org.apache.hadoop.contrib.failmon*"/>
+
+    </javadoc>
+  </target>	
+
+  <target name="javadoc-uptodate" depends="compile, ivy-retrieve-javadoc">
+    <uptodate property="javadoc.is.uptodate">
+      <srcfiles dir="${src.dir}">
+        <include name="**/*.java" />
+        <include name="**/*.html" />
+      </srcfiles>
+      <mapper type="merge" to="${build.javadoc.timestamp}" />
+    </uptodate>
+  </target>
+ 
+  <target name="javadoc" description="Generate javadoc" depends="jar, javadoc-uptodate"
+       unless="javadoc.is.uptodate">
+    <mkdir dir="${build.javadoc}"/>
+    <javadoc
+      overview="${java.src.dir}/overview.html"
+      packagenames="org.apache.hadoop.*"
+      destdir="${build.javadoc}"
+      author="true"
+      version="true"
+      use="true"
+      windowtitle="${Name} ${version} API"
+      doctitle="${Name} ${version} API"
+      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
+      maxmemory="${javadoc.maxmemory}">
+        <packageset dir="${java.src.dir}"/>
+	<packageset dir="src/contrib/failmon/src/java/"/> 
+	
+        <link href="${javadoc.link.java}"/>
+
+        <classpath >
+          <path refid="classpath" />
+          <fileset dir="src/contrib/">
+            <include name="*/lib/*.jar" />
+          </fileset>
+          <path refid="javadoc-classpath"/>
+          <pathelement path="${java.class.path}"/>
+          <pathelement location="${build.tools}"/>
+        </classpath>
+
+       <group title="Core" packages="org.apache.*"/>
+       <group title="contrib: FailMon" packages="org.apache.hadoop.contrib.failmon*"/>
+       <doclet name="org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsStandardDoclet"
+               path="${build.dir}/${final.name}.jar"/>
+    </javadoc>
+  </target>	
+
+  <target name="api-xml" depends="ivy-retrieve-jdiff,javadoc,write-null">
+    <javadoc maxmemory="${javadoc.maxmemory}">
+       <doclet name="org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsJDiffDoclet"
+               path="${build.dir}/${final.name}.jar:${jdiff.jar}:${xerces.jar}">
+         <param name="-apidir" value="${jdiff.xml.dir}"/>
+         <param name="-apiname" value="hadoop-core ${version}"/>
+         <param name="${jdiff.stability}"/>
+       </doclet>
+       <packageset dir="src/java"/>
+       <classpath >
+         <path refid="classpath" />
+         <path refid="jdiff-classpath" />
+         <pathelement path="${java.class.path}"/>
+       </classpath>
+    </javadoc>
+  </target>
+	
+  <target name="write-null">
+	<exec executable="touch">
+	   <arg value="${jdiff.home}/Null.java"/>
+        </exec>
+  </target> 
+
+  <target name="api-report" depends="ivy-retrieve-jdiff,api-xml">
+    <mkdir dir="${jdiff.build.dir}"/>
+    <javadoc sourcepath="src/java"
+             destdir="${jdiff.build.dir}"
+	     sourceFiles="${jdiff.home}/Null.java"
+	     maxmemory="${javadoc.maxmemory}">
+       <doclet name="org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsJDiffDoclet"
+              path="${build.dir}/${final.name}.jar:${jdiff.jar}:${xerces.jar}">
+         <param name="-oldapi" value="hadoop-core ${jdiff.stable}"/>
+         <param name="-newapi" value="hadoop-core ${version}"/>
+         <param name="-oldapidir" value="${jdiff.xml.dir}"/>
+         <param name="-newapidir" value="${jdiff.xml.dir}"/>
+         <param name="-javadocold" value="${jdiff.stable.javadoc}"/>
+         <param name="-javadocnew" value="../../api/"/>
+         <param name="-stats"/>
+         <param name="${jdiff.stability}"/>
+         <param name="${jdiff.compatibility}"/>
+       </doclet>
+       <classpath >
+         <path refid="classpath" />
+         <path refid="jdiff-classpath"/>
+         <pathelement path="${java.class.path}"/>
+       </classpath>
+    </javadoc>
+  </target>
+	
+  <target name="changes-to-html" description="Convert CHANGES.txt into an html file">
+    <mkdir dir="${build.docs}"/>
+    <exec executable="perl" input="CHANGES.txt" output="${build.docs}/changes.html" failonerror="true">
+      <arg value="${changes.src}/changes2html.pl"/>
+    </exec>
+    <copy todir="${build.docs}">
+      <fileset dir="${changes.src}" includes="*.css"/>
+    </copy>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- D I S T R I B U T I O N                                            -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="package" depends="compile, jar, javadoc, docs, api-report, jar-test"
+	  description="Build distribution">
+    <mkdir dir="${dist.dir}"/>
+    <mkdir dir="${dist.dir}/lib"/>
+    <mkdir dir="${dist.dir}/contrib"/>
+    <mkdir dir="${dist.dir}/bin"/>
+    <mkdir dir="${dist.dir}/docs"/>
+    <mkdir dir="${dist.dir}/docs/api"/>
+    <mkdir dir="${dist.dir}/docs/jdiff"/>
+
+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
+      <fileset dir="${common.ivy.lib.dir}"/>
+    </copy>
+
+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false">
+      <fileset dir="lib">
+        <exclude name="**/native/**"/>
+      </fileset>
+    </copy>
+
+  	<exec dir="${dist.dir}" executable="sh" failonerror="true">
+	  <env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
+	  <env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
+	  <env key="DIST_LIB_DIR" value="${dist.dir}/lib/native"/>
+	  <arg line="${native.src.dir}/packageNativeHadoop.sh"/>
+    </exec>
+
+    <subant target="package">
+      <!--Pass down the version in case its needed again and the target
+      distribution directory so contribs know where to install to.-->
+      <property name="version" value="${version}"/>
+      <property name="dist.dir" value="${dist.dir}"/>
+      <fileset file="${contrib.dir}/build.xml"/>
+    </subant>  	
+
+    <copy todir="${dist.dir}"> 
+      <fileset file="${build.dir}/${final.name}.jar"/>
+      <fileset file="${build.dir}/${test.final.name}.jar"/>
+    </copy>
+    
+    <copy todir="${dist.dir}/bin">
+      <fileset dir="bin"/>
+    </copy>
+
+    <copy todir="${dist.dir}/conf">
+      <fileset dir="${conf.dir}" excludes="**/*.template"/>
+    </copy>
+
+    <copy todir="${dist.dir}/docs">
+      <fileset dir="${build.docs}"/>
+    </copy>
+
+    <copy file="ivy.xml" tofile="${dist.dir}/ivy.xml"/>
+
+    <copy todir="${dist.dir}/ivy">
+      <fileset dir="ivy"/>
+    </copy>
+
+    <copy todir="${dist.dir}">
+      <fileset dir=".">
+        <include name="*.txt" />
+      </fileset>
+    </copy>
+
+    <copy todir="${dist.dir}/src" includeEmptyDirs="true">
+      <fileset dir="src" excludes="**/*.template **/docs/build/**/*"/>
+    </copy>
+  	
+    <copy todir="${dist.dir}/" file="build.xml"/>
+
+    <chmod perm="ugo+x" type="file" parallel="false">
+        <fileset dir="${dist.dir}/bin"/>
+        <fileset dir="${dist.dir}/src/contrib/">
+          <include name="*/bin/*" />
+        </fileset>
+        <fileset dir="${dist.dir}/src/contrib/ec2/bin/image"/>
+    </chmod>
+
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make release tarball                                               -->
+  <!-- ================================================================== -->
+  <target name="tar" depends="package" description="Make release tarball">
+    <macro_tar param.destfile="${build.dir}/${final.name}.tar.gz">
+      <param.listofitems>
+        <tarfileset dir="${build.dir}" mode="664">
+          <exclude name="${final.name}/bin/*" />
+          <exclude name="${final.name}/contrib/*/bin/*" />
+          <exclude name="${final.name}/src/contrib/ec2/bin/*" />
+          <exclude name="${final.name}/src/contrib/ec2/bin/image/*" />
+          <include name="${final.name}/**" />
+        </tarfileset>
+        <tarfileset dir="${build.dir}" mode="755">
+          <include name="${final.name}/bin/*" />
+          <include name="${final.name}/contrib/*/bin/*" />
+          <include name="${final.name}/src/contrib/ec2/bin/*" />
+          <include name="${final.name}/src/contrib/ec2/bin/image/*" />
+        </tarfileset>
+      </param.listofitems>
+    </macro_tar>
+  </target>
+
+  <target name="bin-package" depends="compile, jar, jar-test" 
+		description="assembles artifacts for binary target">
+    <mkdir dir="${dist.dir}"/>
+    <mkdir dir="${dist.dir}/lib"/>
+    <mkdir dir="${dist.dir}/contrib"/>
+    <mkdir dir="${dist.dir}/bin"/>
+
+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
+      <fileset dir="${common.ivy.lib.dir}"/>
+    </copy>
+
+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false">
+      <fileset dir="lib">
+        <exclude name="**/native/**"/>
+      </fileset>
+    </copy>
+
+  	<exec dir="${dist.dir}" executable="sh" failonerror="true">
+	  <env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
+	  <env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
+	  <env key="DIST_LIB_DIR" value="${dist.dir}/lib/native"/>
+	  <arg line="${native.src.dir}/packageNativeHadoop.sh"/>
+    </exec>
+
+    <subant target="package">
+      <!--Pass down the version in case its needed again and the target
+      distribution directory so contribs know where to install to.-->
+      <property name="version" value="${version}"/>
+      <property name="dist.dir" value="${dist.dir}"/>
+      <fileset file="${contrib.dir}/build.xml"/>
+    </subant>  	
+
+    <copy todir="${dist.dir}"> 
+      <fileset file="${build.dir}/${final.name}.jar"/>
+    </copy>
+    
+    <copy todir="${dist.dir}/bin">
+      <fileset dir="bin"/>
+    </copy>
+
+    <copy todir="${dist.dir}/conf">
+      <fileset dir="${conf.dir}" excludes="**/*.template"/>
+    </copy>
+
+    <copy file="ivy.xml" tofile="${dist.dir}/ivy.xml"/>
+
+    <copy todir="${dist.dir}/ivy">
+      <fileset dir="ivy"/>
+    </copy>
+
+    <copy todir="${dist.dir}">
+      <fileset dir=".">
+        <include name="*.txt" />
+      </fileset>
+    </copy>
+
+    <copy todir="${dist.dir}/" file="build.xml"/>
+
+    <chmod perm="ugo+x" type="file" parallel="false">
+        <fileset dir="${dist.dir}/bin"/>
+    </chmod>
+  </target>
+
+  <target name="binary" depends="bin-package" description="Make tarball without source and documentation">
+    <macro_tar param.destfile="${build.dir}/${final.name}-bin.tar.gz">
+      <param.listofitems>
+        <tarfileset dir="${build.dir}" mode="664">
+          <exclude name="${final.name}/bin/*" />
+          <exclude name="${final.name}/src/**" />
+          <exclude name="${final.name}/docs/**" />
+          <include name="${final.name}/**" />
+        </tarfileset>
+        <tarfileset dir="${build.dir}" mode="755">
+          <include name="${final.name}/bin/*" />
+        </tarfileset>
+      </param.listofitems>
+    </macro_tar>
+  </target>
+  
+  <target name="ant-task-download" description="To download mvn-ant-task" unless="offline">
+    <get src="${ant_task_repo_url}" dest="${ant_task.jar}" usetimestamp="true"/>
+  </target>
+
+  <target name="mvn-taskdef" depends="ant-task-download">
+     <path id="mvn-ant-task.classpath" path="${ant_task.jar}"/> 
+     <typedef resource="org/apache/maven/artifact/ant/antlib.xml" 
+         uri="urn:maven-artifact-ant"
+         classpathref="mvn-ant-task.classpath"/>
+  </target>   
+
+  <target name="mvn-install" depends="mvn-taskdef,jar,jar-test,set-version"
+    description="Install hadoop common and test jars to local fs m2 repo">
+     <artifact:pom file="${hadoop-common.pom}" id="hadoop.core"/>
+     <artifact:pom file="${hadoop-common-test.pom}" id="hadoop.core.test"/>
+     <artifact:install file="${hadoop-common.jar}">
+        <pom refid="hadoop.core"/>
+	<attach file="${hadoop-common-sources.jar}" classifier="sources" />
+     </artifact:install>
+     <artifact:install file="${hadoop-common-test.jar}">
+        <pom refid="hadoop.core.test"/>
+	<attach file="${hadoop-common-test-sources.jar}" classifier="sources" />
+     </artifact:install>
+  </target>
+
+  <target name="mvn-si-install" depends="mvn-install,-mvn-system-install"
+     description="Install system integration test jars as well"/>
+
+  <target name="mvn-deploy" depends="mvn-taskdef, jar, jar-test,
+     jar-system, set-version, signanddeploy, simpledeploy"
+     description="To deploy hadoop common and test jar's to apache
+     snapshot's repository"/>
+
+  <target name="signanddeploy" if="staging" depends="sign">
+     <artifact:pom file="${hadoop-common.pom}" id="hadoop.core"/>
+     <artifact:pom file="${hadoop-common-test.pom}" id="hadoop.core.test"/>
+     <artifact:pom file="${hadoop-common-instrumented.pom}" 
+       id="hadoop.core.${herriot.suffix}"/>
+     <artifact:install-provider artifactId="wagon-http"
+     version="${wagon-http.version}"/>
+
+     <artifact:deploy file="${hadoop-common.jar}">
+       <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/>
+       <pom refid="hadoop.core"/>
+       <attach file="${hadoop-common.jar}.asc" type="jar.asc"/>
+       <attach file="${hadoop-common.pom}.asc" type="pom.asc"/>
+       <attach file="${hadoop-common-sources.jar}.asc" type="jar.asc"
+         classifier="sources"/>
+       <attach file="${hadoop-common-sources.jar}" classifier="sources"/>
+     </artifact:deploy>
+
+     <artifact:deploy file="${hadoop-common-test.jar}">
+       <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/>
+       <pom refid="hadoop.core.test"/>
+       <attach file="${hadoop-common-test.jar}.asc" type="jar.asc"/>
+       <attach file="${hadoop-common-test.pom}.asc" type="pom.asc"/>
+       <attach file="${hadoop-common-test-sources.jar}.asc" type="jar.asc"
+         classifier="sources"/>
+       <attach file="${hadoop-common-test-sources.jar}" classifier="sources"/>
+     </artifact:deploy>
+
+     <artifact:deploy file="${hadoop-common-instrumented.jar}">
+       <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/>
+       <pom refid="hadoop.core.${herriot.suffix}"/>
+       <attach file="${hadoop-common-instrumented.jar}.asc" type="jar.asc"/>
+       <attach file="${hadoop-common-instrumented.pom}.asc" type="pom.asc"/>
+       <attach file="${hadoop-common-instrumented-sources.jar}.asc" 
+         type="jar.asc" classifier="sources"/>
+       <attach file="${hadoop-common-instrumented-sources.jar}"
+         classifier="sources"/>
+     </artifact:deploy>
+  </target>
+
+  <target name="sign" depends="clean-sign" if="staging">
+    <input message="password:>" addproperty="gpg.passphrase">
+     <handler classname="org.apache.tools.ant.input.SecureInputHandler" />
+    </input>
+    <macrodef name="sign-artifact" description="Signs the artifact">
+      <attribute name="input.file"/>
+      <attribute name="output.file" default="@{input.file}.asc"/>
+      <attribute name="gpg.passphrase"/>
+      <sequential>
+        <echo>Signing @{input.file} Sig File: @{output.file}</echo>
+        <exec executable="gpg" >
+          <arg value="--armor"/>
+          <arg value="--output"/>
+          <arg value="@{output.file}"/>
+          <arg value="--passphrase"/>
+          <arg value="@{gpg.passphrase}"/>
+          <arg value="--detach-sig"/>
+          <arg value="@{input.file}"/>
+        </exec>
+      </sequential>
+    </macrodef>
+    <sign-artifact input.file="${hadoop-common.jar}" 
+     output.file="${hadoop-common.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-test.jar}" 
+     output.file="${hadoop-common-test.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-sources.jar}" 
+     output.file="${hadoop-common-sources.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-test-sources.jar}" 
+     output.file="${hadoop-common-test-sources.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common.pom}" 
+     output.file="${hadoop-common.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-test.pom}" 
+     output.file="${hadoop-common-test.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-instrumented.jar}" 
+     output.file="${hadoop-common-instrumented.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-instrumented.pom}" 
+     output.file="${hadoop-common-instrumented.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-instrumented-sources.jar}" 
+     output.file="${hadoop-common-instrumented-sources.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+  </target>
+
+  <target name="simpledeploy" unless="staging">
+     <artifact:pom file="${hadoop-common.pom}" id="hadoop.core"/>
+     <artifact:pom file="${hadoop-common-test.pom}" id="hadoop.test"/>
+     <artifact:pom file="${hadoop-common-instrumented.pom}" 
+       id="hadoop.core.${herriot.suffix}"/>
+
+     <artifact:install-provider artifactId="wagon-http" version="${wagon-http.version}"/>
+     <artifact:deploy file="${hadoop-common.jar}">
+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
+         <pom refid="hadoop.core"/>
+	 <attach file="${hadoop-common-sources.jar}" classifier="sources" />
+     </artifact:deploy>
+
+     <artifact:deploy file="${hadoop-common-test.jar}">
+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
+         <pom refid="hadoop.core.test"/>
+	 <attach file="${hadoop-common-test-sources.jar}" classifier="sources" />
+     </artifact:deploy> 
+
+     <artifact:deploy file="${hadoop-common-instrumented.jar}">
+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
+         <pom refid="hadoop.core.${herriot.suffix}"/>
+         <attach file="${hadoop-common-instrumented-sources.jar}" classifier="sources" />
+     </artifact:deploy>
+  </target>
+
+  <target name="set-version">
+    <delete file="${basedir}/ivy/hadoop-common.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-test.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-${herriot.suffix}.xml"/>
+    <copy file="${basedir}/ivy/hadoop-common-template.xml" tofile="${basedir}/ivy/hadoop-common.xml"/>
+    <copy file="${basedir}/ivy/hadoop-common-test-template.xml" tofile="${basedir}/ivy/hadoop-common-test.xml"/>
+    <copy file="${basedir}/ivy/hadoop-common-${herriot.suffix}-template.xml"
+      tofile="${basedir}/ivy/hadoop-common-${herriot.suffix}.xml"/>
+    <replaceregexp byline="true">
+      <regexp pattern="@version"/>
+      <substitution expression="${version}"/>
+      <fileset dir="${basedir}/ivy">
+        <include name="hadoop-common.xml"/>
+        <include name="hadoop-common-test.xml"/>
+        <include name="hadoop-common-${herriot.suffix}.xml"/>
+      </fileset>
+    </replaceregexp>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Perform audit activities for the release                           -->
+  <!-- ================================================================== -->
+  <target name="rats-taskdef" depends="ivy-retrieve-releaseaudit">
+     <typedef format="xml" resource="org/apache/rat/anttasks/antlib.xml" uri="antlib:org.apache.rat.anttasks"
+      classpathref="releaseaudit-classpath"/>
+  </target>
+
+  <target name="releaseaudit" depends="package, rats-taskdef" description="Release Audit activities">
+   <rat:report xmlns:rat="antlib:org.apache.rat.anttasks">
+      <fileset dir="${dist.dir}">
+        <exclude name="**/CHANGES.txt"/>
+        <exclude name="**/conf/*"/>
+        <exclude name="**/docs/"/>
+        <exclude name="lib/jdiff/"/>
+        <exclude name="**/native/*"/>
+        <exclude name="**/native/config/*"/>
+        <exclude name="**/VERSION"/>
+        <exclude name="**/*.json"/>
+        <exclude name="**/hod/*.txt"/>
+      </fileset>
+    </rat:report>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Clean.  Delete the build files, and their directories              -->
+  <!-- ================================================================== -->
+  <target name="clean" depends="clean-contrib, clean-sign, clean-fi" description="Clean.  Delete the build files, and their directories">
+    <delete dir="${build.dir}"/>
+    <delete file="${basedir}/ivy/hadoop-common.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-pom.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-test.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-test-pom.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-${herriot.suffix}.xml"/>
+    <delete dir="${docs.src}/build"/>
+    <delete dir="${src.docs.cn}/build"/>
+  </target>
+
+  <target name="clean-sign" description="Clean.  Delete .asc files">
+    <delete>
+      <fileset dir="." includes="**/**/*.asc"/>
+    </delete>
+  </target>  
+
+  <target name="veryclean" depends="clean" description="Delete mvn ant task jar and ivy ant taks jar">
+    <delete>
+      <fileset dir="${ivy.dir}" includes="*.jar"/>
+    </delete>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Clean contrib target. For now, must be called explicitly           -->
+  <!-- Using subant instead of ant as a workaround for 30569              -->
+  <!-- ================================================================== -->
+  <target name="clean-contrib">
+     <subant target="clean">        
+        <fileset file="src/contrib/build.xml"/>
+     </subant>  	
+  </target>
+	
+ <target name="clover" depends="clover.setup, clover.info" description="Instrument the Unit tests using Clover. 
+     To use, specify -Dclover.home=&lt;base of clover installation&gt; -Drun.clover=true on the command line."/>
+
+<target name="clover.setup" if="clover.enabled">
+   <taskdef resource="cloverlib.xml" classpath="${clover.jar}"/>
+   <mkdir dir="${clover.db.dir}"/>
+   <clover-setup initString="${clover.db.dir}/hadoop_coverage.db">
+     <fileset dir="${src.dir}" includes="java/**/*"/>
+     <testsources dir="${test.src.dir}"/>
+   </clover-setup>
+</target>
+
+<target name="clover.info" unless="clover.present">
+  <echo>
+     Clover not found. Code coverage reports disabled.
+  </echo>
+</target>
+
+<target name="clover.check">
+  <fail unless="clover.present">
+  ##################################################################
+   Clover not found.
+   Please specify -Dclover.home=&lt;base of clover installation&gt;
+   on the command line.
+  ##################################################################
+  </fail>
+</target>
+
+<target name="generate-clover-reports" depends="clover.check, clover">
+  <mkdir dir="${clover.report.dir}"/>
+  <clover-report>
+     <current outfile="${clover.report.dir}" title="${final.name}">
+     <format type="html"/>
+     </current>
+  </clover-report>
+  <clover-report>
+     <current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
+     <format type="xml"/>
+     </current>
+  </clover-report>
+</target>
+
+<target name="findbugs.check" depends="check-for-findbugs" unless="findbugs.present">
+  <fail message="'findbugs.home' is not defined. Please pass -Dfindbugs.home=&lt;base of Findbugs installation&gt; to Ant on the command-line." />
+</target>
+
+<target name="patch.check" unless="patch.file">
+  <fail message="'patch.file' is not defined. Please pass -Dpatch.file=&lt;location of patch file&gt; to Ant on the command-line." />
+</target>
+
+<target name="test-patch" depends="patch.check,findbugs.check,forrest.check">
+  <exec executable="bash" failonerror="true">
+    <arg value="${basedir}/src/test/bin/test-patch.sh"/>
+    <arg value="DEVELOPER"/>
+    <arg value="${patch.file}"/>
+    <arg value="${scratch.dir}"/>
+    <arg value="${svn.cmd}"/>
+    <arg value="${grep.cmd}"/>
+    <arg value="${patch.cmd}"/>
+    <arg value="${findbugs.home}"/>
+    <arg value="${forrest.home}"/>
+    <arg value="${basedir}"/>
+    <arg value="${java5.home}"/>
+  </exec>
+</target>
+
+<target name="hudson-test-patch" depends="findbugs.check,forrest.check">
+  <exec executable="bash" failonerror="true">
+    <arg value="${basedir}/src/test/bin/test-patch.sh"/>
+    <arg value="HUDSON"/>
+    <arg value="${scratch.dir}"/>
+    <arg value="${support.dir}"/>
+    <arg value="${ps.cmd}"/>
+    <arg value="${wget.cmd}"/>
+    <arg value="${jiracli.cmd}"/>
+    <arg value="${svn.cmd}"/>
+    <arg value="${grep.cmd}"/>
+    <arg value="${patch.cmd}"/>
+    <arg value="${findbugs.home}"/>
+    <arg value="${forrest.home}"/>
+    <arg value="${eclipse.home}"/>
+    <arg value="${python.home}"/>
+    <arg value="${basedir}"/>
+    <arg value="${jira.passwd}"/>
+    <arg value="${java5.home}"/>
+    <arg value="${curl.cmd}"/>
+    <arg value="${defect}"/>
+  </exec>
+</target>
+	
+  <condition property="ant-eclipse.jar.exists">
+    <available file="${build.dir}/lib/ant-eclipse-1.0-jvm1.2.jar"/>
+  </condition>
+
+  <target name="ant-eclipse-download" unless="ant-eclipse.jar.exists"
+          description="Downloads the ant-eclipse binary.">
+    <get src="http://downloads.sourceforge.net/project/ant-eclipse/ant-eclipse/1.0/ant-eclipse-1.0.bin.tar.bz2"
+         dest="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" usetimestamp="false" />
+
+    <untar src="${build.dir}/ant-eclipse-1.0.bin.tar.bz2"
+           dest="${build.dir}" compression="bzip2">
+      <patternset>
+        <include name="lib/ant-eclipse-1.0-jvm1.2.jar"/>
+      </patternset>
+    </untar>
+    <delete file="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" />
+  </target>
+
+  <target name="eclipse" 
+          depends="init,ant-eclipse-download,ivy-retrieve-common,ivy-retrieve-test,compile-core-test"
+          description="Create eclipse project files">
+	     <pathconvert property="eclipse.project">
+	       <path path="${basedir}"/>
+	       <regexpmapper from="^.*/([^/]+)$$" to="\1" handledirsep="yes"/>
+	     </pathconvert>
+    <taskdef name="eclipse"
+             classname="prantl.ant.eclipse.EclipseTask"
+             classpath="${build.dir}/lib/ant-eclipse-1.0-jvm1.2.jar" />
+    <eclipse updatealways="true">
+      <project name="${eclipse.project}" />
+      <classpath>
+        <source path="${java.src.dir}"
+                output="${build.dir.eclipse-main-classes}" />
+        <source path="${test.src.dir}/core"
+                output="${build.dir.eclipse-test-classes}" />
+        <source path="${test.src.dir}/aop"
+                output="${build.dir.eclipse-test-classes}" />
+        <source path="${test.generated.dir}"
+                output="${build.dir.eclipse-test-generated-classes}" />
+        <output path="${build.dir.eclipse-main-classes}" />
+        <library pathref="ivy-common.classpath" exported="true" />
+        <library pathref="ivy-test.classpath" exported="false" />
+        <variable path="ANT_HOME/lib/ant.jar" exported="false" />
+        <library path="${conf.dir}" exported="false" />
+      </classpath>
+    </eclipse>
+  </target>
+
+  <target name="ivy-init-dirs">
+    <mkdir dir="${build.ivy.dir}" />
+    <mkdir dir="${build.ivy.lib.dir}" />
+    <mkdir dir="${build.ivy.report.dir}" />
+    <mkdir dir="${build.ivy.maven.dir}" />
+  </target>
+
+  <target name="ivy-probe-antlib" >
+    <condition property="ivy.found">
+      <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
+    </condition>
+  </target>
+
+  <target name="ivy-download" description="To download ivy" unless="offline">
+    <get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
+  </target>
+
+  <!--
+  To avoid Ivy leaking things across big projects, always load Ivy in the same classloader.
+  Also note how we skip loading Ivy if it is already there, just to make sure all is well.
+  -->
+  <target name="ivy-init-antlib" depends="ivy-download,ivy-init-dirs,ivy-probe-antlib" unless="ivy.found">
+    <typedef uri="antlib:org.apache.ivy.ant" onerror="fail"
+      loaderRef="ivyLoader">
+      <classpath>
+        <pathelement location="${ivy.jar}"/>
+      </classpath>
+    </typedef>
+    <fail >
+      <condition >
+        <not>
+          <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
+        </not>
+      </condition>
+      You need Apache Ivy 2.0 or later from http://ant.apache.org/
+      It could not be loaded from ${ivy_repo_url}
+    </fail>
+  </target>
+
+  <property name="ivyresolvelog" value="download-only"/>
+  <property name="ivyretrievelog" value="quiet"/>
+
+  <target name="ivy-init" depends="ivy-init-antlib" >
+
+    <!--Configure Ivy by reading in the settings file
+        If anyone has already read in a settings file into this settings ID, it gets priority
+    -->
+    <ivy:configure settingsid="${ant.project.name}.ivy.settings" file="${ivysettings.xml}" override='false'
+      realm="Sonatype Nexus Repository Manager"/>
+
+  </target>
+
+  <target name="ivy-resolve" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-javadoc" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="javadoc"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-releaseaudit" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="releaseaudit"
+  		log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-test" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="test"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-common" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="common"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-jdiff" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="jdiff"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-checkstyle" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="checkstyle"
+  		log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-retrieve" depends="ivy-resolve"
+    description="Retrieve Ivy-managed artifacts">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+    		log="${ivyretrievelog}"/>
+  </target>
+
+  <target name="ivy-retrieve-checkstyle" depends="ivy-resolve-checkstyle"
+    description="Retrieve Ivy-managed artifacts for the checkstyle configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+  			log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="checkstyle-classpath" conf="checkstyle"/>
+  </target>
+
+  <target name="ivy-retrieve-jdiff" depends="ivy-resolve-jdiff"
+    description="Retrieve Ivy-managed artifacts for the jdiff configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+  			log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="jdiff-classpath" conf="jdiff"/>
+  </target>
+
+  <target name="ivy-retrieve-javadoc" depends="ivy-resolve-javadoc"
+    description="Retrieve Ivy-managed artifacts for the javadoc configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+  			log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="javadoc-classpath" conf="javadoc"/>
+  </target>
+
+  <target name="ivy-retrieve-test" depends="ivy-resolve-test"
+    description="Retrieve Ivy-managed artifacts for the test configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+    		log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="ivy-test.classpath" conf="test"/>
+  </target>
+
+  <target name="ivy-retrieve-common" depends="ivy-resolve-common"
+    description="Retrieve Ivy-managed artifacts for the compile configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+    		log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="ivy-common.classpath" conf="common"/>
+  </target>
+
+  <target name="ivy-retrieve-releaseaudit" depends="ivy-resolve-releaseaudit"
+    description="Retrieve Ivy-managed artifacts for the compile configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+    		log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="releaseaudit-classpath" conf="releaseaudit"/>
+  </target>
+
+  <target name="ivy-report" depends="ivy-resolve-releaseaudit"
+    description="Generate">
+    <ivy:report todir="${build.ivy.report.dir}" settingsRef="${ant.project.name}.ivy.settings"/>
+    <echo>
+      Reports generated:${build.ivy.report.dir}
+    </echo>
+  </target>
+
+</project>

+ 24 - 0
common/conf/configuration.xsl

@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="configuration">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+  <td><a name="{name}"><xsl:value-of select="name"/></a></td>
+  <td><xsl:value-of select="value"/></td>
+  <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>

+ 8 - 0
common/conf/core-site.xml.template

@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+</configuration>

+ 54 - 0
common/conf/hadoop-env.sh.template

@@ -0,0 +1,54 @@
+# Set Hadoop-specific environment variables here.
+
+# The only required environment variable is JAVA_HOME.  All others are
+# optional.  When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use.  Required.
+# export JAVA_HOME=/usr/lib/j2sdk1.6-sun
+
+# Extra Java CLASSPATH elements.  Optional.
+# export HADOOP_CLASSPATH="<extra_entries>:$HADOOP_CLASSPATH"
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+# export HADOOP_HEAPSIZE=2000
+
+# Extra Java runtime options.  Empty by default.
+# if [ "$HADOOP_OPTS" == "" ]; then export HADOOP_OPTS=-server; else HADOOP_OPTS+=" -server"; fi
+
+# Command specific options appended to HADOOP_OPTS when specified
+export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
+export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+# export HADOOP_TASKTRACKER_OPTS=
+# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+# export HADOOP_CLIENT_OPTS
+
+# Extra ssh options.  Empty by default.
+# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+
+# Where log files are stored.  $HADOOP_HOME/logs by default.
+# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
+
+# File naming remote slave hosts.  $HADOOP_HOME/conf/slaves by default.
+# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
+
+# host:path where hadoop code should be rsync'd from.  Unset by default.
+# export HADOOP_MASTER=master:/home/$USER/src/hadoop
+
+# Seconds to sleep between slave commands.  Unset by default.  This
+# can be useful in large clusters, where, e.g., slave rsyncs can
+# otherwise arrive faster than the master can service them.
+# export HADOOP_SLAVE_SLEEP=0.1
+
+# The directory where pid files are stored. /tmp by default.
+# export HADOOP_PID_DIR=/var/hadoop/pids
+
+# A string representing this instance of hadoop. $USER by default.
+# export HADOOP_IDENT_STRING=$USER
+
+# The scheduling priority for daemon processes.  See 'man nice'.
+# export HADOOP_NICENESS=10

+ 72 - 0
common/conf/hadoop-metrics.properties

@@ -0,0 +1,72 @@
+# Configuration of the "dfs" context for null
+dfs.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "dfs" context for file
+#dfs.class=org.apache.hadoop.metrics.file.FileContext
+#dfs.period=10
+#dfs.fileName=/tmp/dfsmetrics.log
+
+# Configuration of the "dfs" context for ganglia
+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# dfs.period=10
+# dfs.servers=localhost:8649
+
+
+# Configuration of the "mapred" context for null
+mapred.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "mapred" context for file
+#mapred.class=org.apache.hadoop.metrics.file.FileContext
+#mapred.period=10
+#mapred.fileName=/tmp/mrmetrics.log
+
+# Configuration of the "mapred" context for ganglia
+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# mapred.period=10
+# mapred.servers=localhost:8649
+
+
+# Configuration of the "jvm" context for null
+#jvm.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "jvm" context for file
+#jvm.class=org.apache.hadoop.metrics.file.FileContext
+#jvm.period=10
+#jvm.fileName=/tmp/jvmmetrics.log
+
+# Configuration of the "jvm" context for ganglia
+# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# jvm.period=10
+# jvm.servers=localhost:8649
+
+# Configuration of the "rpc" context for null
+rpc.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "rpc" context for file
+#rpc.class=org.apache.hadoop.metrics.file.FileContext
+#rpc.period=10
+#rpc.fileName=/tmp/rpcmetrics.log
+
+# Configuration of the "rpc" context for ganglia
+# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# rpc.period=10
+# rpc.servers=localhost:8649
+
+
+# Configuration of the "ugi" context for null
+ugi.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "ugi" context for file
+#ugi.class=org.apache.hadoop.metrics.file.FileContext
+#ugi.period=10
+#ugi.fileName=/tmp/ugimetrics.log
+
+# Configuration of the "ugi" context for ganglia
+# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# ugi.period=10
+# ugi.servers=localhost:8649
+

+ 16 - 0
common/conf/hadoop-metrics2.properties.example

@@ -0,0 +1,16 @@
+# syntax: [prefix].[source|sink].[instance].[options]
+# See javadoc of package-info.java for org.apache.hadoop.metrics2 for details
+
+*.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink
+
+#namenode.sink.file.filename=namenode-metrics.out
+
+#datanode.sink.file.filename=datanode-metrics.out
+
+#jobtracker.sink.file.filename=jobtracker-metrics.out
+
+#tasktracker.sink.file.filename=tasktracker-metrics.out
+
+#maptask.sink.file.filename=maptask-metrics.out
+
+#reducetask.sink.file.filename=reducetask-metrics.out

+ 106 - 0
common/conf/hadoop-policy.xml.template

@@ -0,0 +1,106 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+  <property>
+    <name>security.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientProtocol, which is used by user code 
+    via the DistributedFileSystem. 
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.client.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol 
+    for block recovery.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for DatanodeProtocol, which is used by datanodes to 
+    communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.inter.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for InterDatanodeProtocol, the inter-datanode protocol
+    for updating generation timestamp.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.namenode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for NamenodeProtocol, the protocol used by the secondary
+    namenode to communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.inter.tracker.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for InterTrackerProtocol, used by the tasktrackers to 
+    communicate with the jobtracker.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.job.submission.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for JobSubmissionProtocol, used by job clients to 
+    communciate with the jobtracker for job submission, querying job status etc.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.task.umbilical.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for TaskUmbilicalProtocol, used by the map and reduce 
+    tasks to communicate with the parent tasktracker. 
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.refresh.policy.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for RefreshAuthorizationPolicyProtocol, used by the 
+    dfsadmin and mradmin commands to refresh the security policy in-effect. 
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.admin.operations.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for AdminOperationsProtocol, used by the mradmins commands
+    to refresh queues and nodes at JobTracker. The ACL is a comma-separated list of 
+    user and group names. The user and group list is separated by a blank. 
+    For e.g. "alice,bob users,wheel". A special value of "*" means all users are 
+    allowed.</description>
+  </property>
+</configuration>

+ 149 - 0
common/conf/log4j.properties

@@ -0,0 +1,149 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+#
+# Job Summary Appender 
+#
+# Use following logger to send summary to separate file defined by 
+# hadoop.mapreduce.jobsummary.log.file rolled daily:
+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
+# 
+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshold=ALL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.iscleanup=false
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+#Security appender
+#
+hadoop.security.log.file=SecurityAuth.audit
+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 
+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+
+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+#new logger
+log4j.category.SecurityLogger=INFO,DRFAS
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
+
+#
+# Job Summary Appender
+#
+log4j.appender.JSA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.appender.JSA.DatePattern=.yyyy-MM-dd
+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
+
+#
+# MapReduce Audit Log Appender
+#
+
+# Set the MapReduce audit log filename
+#hadoop.mapreduce.audit.log.file=hadoop-mapreduce.audit.log
+
+# Appender for AuditLogger.
+# Requires the following system properties to be set
+#    - hadoop.log.dir (Hadoop Log directory)
+#    - hadoop.mapreduce.audit.log.file (MapReduce audit log filename)
+
+#log4j.logger.org.apache.hadoop.mapred.AuditLogger=INFO,MRAUDIT
+#log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
+#log4j.appender.MRAUDIT=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.MRAUDIT.File=${hadoop.log.dir}/${hadoop.mapreduce.audit.log.file}
+#log4j.appender.MRAUDIT.DatePattern=.yyyy-MM-dd
+#log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
+#log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n

+ 1 - 0
common/conf/masters.template

@@ -0,0 +1 @@
+localhost

+ 1 - 0
common/conf/slaves.template

@@ -0,0 +1 @@
+localhost

+ 57 - 0
common/conf/ssl-client.xml.example

@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+  <name>ssl.client.truststore.location</name>
+  <value></value>
+  <description>Truststore to be used by clients like distcp. Must be
+  specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.location</name>
+  <value></value>
+  <description>Keystore to be used by clients like distcp. Must be
+  specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.keypassword</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+</configuration>

+ 55 - 0
common/conf/ssl-server.xml.example

@@ -0,0 +1,55 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+  <name>ssl.server.truststore.location</name>
+  <value></value>
+  <description>Truststore to be used by NN and DN. Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.location</name>
+  <value></value>
+  <description>Keystore to be used by NN and DN. Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.password</name>
+  <value></value>
+  <description>Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.keypassword</name>
+  <value></value>
+  <description>Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+</configuration>

+ 261 - 0
common/ivy.xml

@@ -0,0 +1,261 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<ivy-module version="1.0">
+  <info organisation="org.apache.hadoop" module="${ant.project.name}" revision="${version}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
+    <description>
+        Hadoop Common
+    </description>
+  </info>
+  <configurations defaultconfmapping="default">
+    <!--these match the Maven configurations-->
+    <conf name="default" extends="master,runtime"/>
+    <conf name="master" description="contains the artifact but no dependencies"/>
+    <conf name="runtime" description="runtime but not the artifact"
+      extends="client,server,s3-server,kfs,mandatory,jetty,ftp"/>
+
+    <conf name="mandatory" description="contains the critical  dependencies"
+      extends="commons-logging,log4j"/>
+
+    <!--
+    These public configurations contain the core dependencies for running hadoop client or server.
+    The server is effectively a superset of the client.
+    -->
+    <conf name="client" description="client-side dependencies"
+      extends="mandatory,httpclient"/>
+    <conf name="server" description="server-side dependencies"
+      extends="client"/>
+    <conf name="s3-client" description="dependencies for working with S3/EC2 infrastructure"
+      extends="client"/>
+    <conf name="s3-server" description="dependencies for running on S3/EC2 infrastructure"
+      extends="s3-client,server"/>
+    <conf name="kfs" description="dependencies for KFS file system support"/>
+    <conf name="ftp" description="dependencies for workign with FTP filesytems" 
+              extends="mandatory"/>
+    <conf name="jetty" description="Jetty provides the in-VM HTTP daemon" extends="commons-logging"/>
+
+    <conf name="common" extends="runtime,mandatory,httpclient,ftp,jetty,jdiff"
+                        description="common artifacts"/>
+    <!--Testing pulls in everything-->
+   <conf name="test" extends="master" description="the classpath needed to run tests"/>
+
+    <!--Private configurations. -->
+
+    <conf name="javadoc" visibility="private" description="artiracts required while performing doc generation"
+      extends="common,mandatory,jetty,lucene"/>
+
+    <conf name="releaseaudit" visibility="private"
+	description="Artifacts required for releaseaudit target"/>
+     
+    <conf name="commons-logging" visibility="private"/>
+    <conf name="httpclient" visibility="private" extends="commons-logging"/>
+    <conf name="log4j" visibility="private"/>
+    <conf name="lucene" visibility="private"/>
+    <conf name="jdiff" visibility="private" extends="log4j,s3-client,jetty,server"/>
+    <conf name="checkstyle" visibility="private"/>
+
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+  <dependencies>
+
+ <!--used client side-->
+    <dependency org="commons-cli"
+      name="commons-cli"
+      rev="${commons-cli.version}"
+      conf="client->default"/>
+    <dependency org="checkstyle"
+      name="checkstyle"
+      rev="${checkstyle.version}"
+      conf="checkstyle->default"/>
+    <dependency org="jdiff"
+      name="jdiff"
+      rev="${jdiff.version}"
+      conf="jdiff->default"/>
+
+    <dependency org="xmlenc"
+      name="xmlenc"
+      rev="${xmlenc.version}"
+      conf="server->default"/>
+
+    <!--Configuration: httpclient-->
+
+    <dependency org="commons-codec"
+      name="commons-codec"
+      rev="${commons-codec.version}"
+      conf="httpclient->default"/>
+
+    <dependency org="commons-net"
+      name="commons-net"
+      rev="${commons-net.version}"
+      conf="ftp->default"/>
+
+    <!--Configuration: Jetty -->
+    <dependency org="org.mortbay.jetty"
+      name="jetty"
+      rev="${jetty.version}"
+      conf="jetty->default">
+      <exclude module="ant"/>
+    </dependency>
+
+    <dependency org="tomcat"
+      name="jasper-runtime"
+      rev="${jasper.version}"
+      conf="jetty->master"/>
+    <dependency org="tomcat"
+      name="jasper-compiler"
+      rev="${jasper.version}"
+      conf="jetty->master"/>
+    <dependency org="org.mortbay.jetty"
+      name="jsp-2.1-jetty"
+      rev="${jetty.version}"
+      conf="jetty->default"/>
+    <dependency org="commons-el"
+      name="commons-el"
+      rev="${commons-el.version}"
+      conf="jetty->master"/>
+
+
+    <!--Configuration: commons-logging -->
+
+    <!--it is essential that only the master JAR of commons logging
+    is pulled in, as its dependencies are usually a mess, including things
+    like out of date servlet APIs, bits of Avalon, etc.
+    -->
+    <dependency org="commons-logging"
+      name="commons-logging"
+      rev="${commons-logging.version}"
+      conf="commons-logging->master"/>
+
+
+    <!--Configuration: commons-logging -->
+
+    <!--log4J is not optional until commons-logging.properties is stripped out of the JAR -->
+    <dependency org="log4j"
+      name="log4j"
+      rev="${log4j.version}"
+      conf="log4j->master"/>
+
+    <!--Configuration: s3-client -->
+    <!--there are two jets3t projects in the repository; this one goes up to 0.6 and
+    is assumed to be the live one-->
+    <dependency org="net.java.dev.jets3t"
+      name="jets3t"
+      rev="${jets3t.version}"
+      conf="s3-client->default"/>
+    <dependency org="commons-net"
+      name="commons-net"
+      rev="${commons-net.version}"
+      conf="s3-client->master"/> 
+    <dependency org="net.sf.kosmosfs"
+      name="kfs"
+      rev="${kfs.version}"
+      conf="kfs->default"/>
+
+    <!--Configuration: test -->
+    <!--artifacts needed for testing -->
+
+    <dependency org="junit"
+      name="junit"
+      rev="${junit.version}"
+      conf="test->default"/>
+    <dependency org="org.apache.rat"
+      name="apache-rat-tasks"
+      rev="${rats-lib.version}"
+      conf="releaseaudit->default"/>
+    <dependency org="commons-lang"
+      name="commons-lang"
+      rev="${commons-lang.version}"
+      conf="releaseaudit->default"/>
+    <dependency org="commons-collections"
+      name="commons-collections"
+      rev="${commons-collections.version}"
+      conf="releaseaudit->default"/>
+    <dependency org="hsqldb"
+      name="hsqldb"
+      rev="${hsqldb.version}"
+      conf="common->default"/>
+    <dependency org="org.apache.lucene"
+      name="lucene-core"
+      rev="${lucene-core.version}"
+      conf="javadoc->default"/> 
+    <dependency org="commons-logging"
+      name="commons-logging-api"
+      rev="${commons-logging-api.version}"
+      conf="common->default"/>
+    <dependency org="org.slf4j"
+      name="slf4j-api"
+      rev="${slf4j-api.version}"
+      conf="common->default"/>
+    <dependency org="org.eclipse.jdt"
+      name="core"
+      rev="${core.version}"
+      conf="common->master"/>
+    <dependency org="org.slf4j"
+      name="slf4j-log4j12"
+      rev="${slf4j-log4j12.version}"
+      conf="common->master">
+    </dependency>
+    <dependency org="org.apache.avro"
+      name="avro"
+      rev="${avro.version}"
+      conf="common->default">
+      <exclude module="ant"/>
+      <exclude module="jetty"/>
+      <exclude module="slf4j-simple"/>
+      <exclude module="velocity"/>
+      <exclude module="netty" />
+    </dependency>
+    <dependency org="org.aspectj"
+      name="aspectjrt"
+      rev="${aspectj.version}"
+      conf="common->default">
+    </dependency>
+    <dependency org="org.aspectj"
+      name="aspectjtools"
+      rev="${aspectj.version}"
+      conf="common->default">
+    </dependency>
+    <dependency org="org.mockito" 
+      name="mockito-all" 
+      rev="${mockito-all.version}" 
+      conf="test->default">
+    </dependency> 
+    <dependency org="com.jcraft"
+      name="jsch"
+      rev="${jsch.version}"
+      conf="common->default">
+    </dependency>
+    <dependency org="commons-configuration"
+      name="commons-configuration"
+      rev="${commons-configuration.version}"
+      conf="common->default"/>
+    <dependency org="org.apache.commons"
+      name="commons-math"
+      rev="${commons-math.version}"
+      conf="common->default"/>
+    <dependency org="com.google.guava"
+      name="guava"
+      rev="${guava.version}"
+      conf="common->default"/>
+  </dependencies>
+</ivy-module>

+ 42 - 0
common/ivy/hadoop-common-instrumented-template.xml

@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-common-instrumented</artifactId>
+  <packaging>jar</packaging>
+  <version>@version</version>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <version>@version</version>
+    </dependency>
+    <dependency>
+      <groupId>org.aspectj</groupId>
+      <artifactId>aspectjrt</artifactId>
+      <version>1.6.5</version>
+    </dependency>
+    <dependency>
+      <groupId>org.aspectj</groupId>
+      <artifactId>aspectjtools</artifactId>
+      <version>1.6.5</version>
+    </dependency>
+  </dependencies>
+</project>

+ 151 - 0
common/ivy/hadoop-common-template.xml

@@ -0,0 +1,151 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-common</artifactId>
+  <packaging>jar</packaging>
+  <version>@version</version>
+  <dependencies>
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+      <version>1.2</version>
+    </dependency>
+    <dependency>
+      <groupId>xmlenc</groupId>
+      <artifactId>xmlenc</artifactId>
+      <version>0.52</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+      <version>1.4</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+      <version>1.1.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>1.6.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <version>1.6.1</version>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <version>1.2.16</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty</artifactId>
+      <version>6.1.26</version>
+    </dependency>
+    <dependency>
+      <groupId>tomcat</groupId>
+      <artifactId>jasper-runtime</artifactId>
+      <version>5.5.12</version>
+    </dependency>
+    <dependency>
+      <groupId>tomcat</groupId>
+      <artifactId>jasper-compiler</artifactId>
+      <version>5.5.12</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-2.1-jetty</artifactId>
+      <version>6.1.26</version>
+      <exclusions>
+        <exclusion>
+          <groupId>ant</groupId>
+          <artifactId>ant</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>commons-el</groupId>
+      <artifactId>commons-el</artifactId>
+      <version>1.0</version>
+    </dependency>
+    <dependency>
+      <groupId>net.java.dev.jets3t</groupId>
+      <artifactId>jets3t</artifactId>
+      <version>0.7.1</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-net</groupId>
+      <artifactId>commons-net</artifactId>
+      <version>1.4.1</version>
+    </dependency>
+    <dependency>
+      <groupId>net.sf.kosmosfs</groupId>
+      <artifactId>kfs</artifactId>
+      <version>0.3</version>
+    </dependency>
+    <dependency>
+      <groupId>hsqldb</groupId>
+      <artifactId>hsqldb</artifactId>
+      <version>1.8.0.10</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>1.4.1</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.tools.ant</groupId>
+          <artifactId>ant</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.jboss.netty</groupId>
+          <artifactId>netty</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.velocity</groupId>
+          <artifactId>velocity</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>commons-configuration</groupId>
+      <artifactId>commons-configuration</artifactId>
+      <version>1.6</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math</artifactId>
+      <version>2.1</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>r07</version>
+    </dependency>
+  </dependencies>
+</project>

+ 43 - 0
common/ivy/hadoop-common-test-template.xml

@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-common-test</artifactId>
+  <packaging>jar</packaging>
+  <version>@version</version>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <version>@version</version>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.8.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.8.5</version>
+    </dependency>
+  </dependencies>
+</project>

+ 50 - 0
common/ivy/ivysettings.xml

@@ -0,0 +1,50 @@
+<ivysettings>
+ <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+  <property name="repo.maven.org" value="http://repo1.maven.org/maven2/" override="false"/>
+
+  <property name="maven2.pattern" value="[organisation]/[module]/[revision]/[module]-[revision]"/>
+  <property name="repo.dir" value="${user.home}/.m2/repository"/>
+  <!-- pull in the local repository -->
+  <include url="${ivy.default.conf.dir}/ivyconf-local.xml"/>
+
+  <property name="resolvers" value="default" override="false"/>
+  <property name="force-resolve" value="false" override="false"/>
+  <settings defaultResolver="${resolvers}"/>
+
+  <resolvers>
+    <!--ibiblio resolvers-->
+    <ibiblio name="maven2" root="${repo.maven.org}" m2compatible="true"/>
+
+    <filesystem name="fs" m2compatible="true" force="${force-resolve}">
+       <artifact pattern="${repo.dir}/${maven2.pattern}.[ext]"/>
+       <ivy pattern="${repo.dir}/${maven2.pattern}.pom"/>
+    </filesystem>
+
+    <chain name="default" dual="true">
+      <resolver ref="maven2"/>
+    </chain>
+
+    <chain name="internal" dual="true">
+      <resolver ref="fs"/>
+      <resolver ref="maven2"/>
+    </chain>
+
+  </resolvers>
+
+</ivysettings>

+ 62 - 0
common/ivy/libraries.properties

@@ -0,0 +1,62 @@
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+#This properties file lists the versions of the various artifacts used by hadoop and components.
+#It drives ivy and the generation of a maven POM
+
+#These are the versions of our dependencies (in alphabetical order)
+ant-task.version=2.0.10
+aspectj.version=1.6.5
+avro.version=1.4.1
+
+checkstyle.version=4.2
+commons-cli.version=1.2
+commons-codec.version=1.4
+commons-collections.version=3.1
+commons-configuration.version=1.6
+commons-lang.version=2.5
+commons-logging.version=1.1.1
+commons-logging-api.version=1.1
+commons-el.version=1.0
+commons-fileupload.version=1.2
+commons-io.version=1.4
+commons-math.version=2.1
+commons-net.version=1.4.1
+core.version=3.1.1
+
+guava.version=r07
+
+hsqldb.version=1.8.0.10
+
+ivy.version=2.2.0
+
+jasper.version=5.5.12
+jets3t.version=0.7.1
+jetty.version=6.1.26
+junit.version=4.8.1
+jdiff.version=1.0.9
+jsch.version=0.1.42
+
+kfs.version=0.3
+
+log4j.version=1.2.16
+lucene-core.version=2.3.1
+
+mockito-all.version=1.8.5
+
+rats-lib.version=0.6
+
+slf4j-api.version=1.6.1
+slf4j-log4j12.version=1.6.1
+
+xmlenc.version=0.52
+xerces.version=1.4.4

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop-core_0.20.0.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop-core_0.21.0.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop_0.17.0.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop_0.18.1.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop_0.18.2.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop_0.18.3.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop_0.19.0.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop_0.19.1.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop_0.19.2.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop_0.20.0.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop_0.20.1.xml


파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 11 - 0
common/lib/jdiff/hadoop_0.20.2.xml


+ 11 - 0
common/src/contrib/bash-tab-completion/README

@@ -0,0 +1,11 @@
+Bash tab completion support for the hadoop script.
+
+On Debian-like distributions, the script can be placed in
+/etc/bash_completion.d/, and it will be sourced automatically by Bash. On
+other distributions, you may source the file manually (`. hadoop.sh') or
+source it from your bashrc (or equivalent) file.
+
+The script allows tab completion of all the command names, subcommands for the
+'fs', 'dfsadmin', 'job', 'namenode' and 'pipe' commands, arguments of the 'jar'
+command and most arguments to the 'fs' subcommands (completing local and 
+dfs paths as appropriate).

+ 121 - 0
common/src/contrib/bash-tab-completion/hadoop.sh

@@ -0,0 +1,121 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Provides tab completion for the main hadoop script.
+#
+# On debian-based systems, place in /etc/bash_completion.d/ and either restart
+# Bash or source the script manually (. /etc/bash_completion.d/hadoop.sh).
+
+_hadoop() {
+  local script cur prev temp
+
+  COMPREPLY=()
+  cur=${COMP_WORDS[COMP_CWORD]}
+  prev=${COMP_WORDS[COMP_CWORD-1]}  
+  script=${COMP_WORDS[0]}  
+  
+  # Bash lets you tab complete things even if the script doesn't
+  # exist (or isn't executable). Check to make sure it is, as we
+  # need to execute it to get options/info
+  if [ -f "$script" -a -x "$script" ]; then
+    case $COMP_CWORD in
+    1)
+      # Completing the first argument (the command).
+
+      temp=`$script | grep -n "^\s*or"`;
+      temp=`$script | head -n $((${temp%%:*} - 1)) | awk '/^ / {print $1}' | sort | uniq`;
+      COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
+      return 0;;
+
+    2)
+      # Completing the second arg (first arg to the command)
+
+      # The output of commands isn't hugely consistent, so certain
+      # names are hardcoded and parsed differently. Some aren't
+      # handled at all (mostly ones without args).
+      case ${COMP_WORDS[1]} in
+      dfs | dfsadmin | fs | job | pipes)
+        # One option per line, enclosed in square brackets
+
+        temp=`$script ${COMP_WORDS[1]} 2>&1 | awk '/^[ \t]*\[/ {gsub("[[\\]]", ""); print $1}'`;
+        COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
+        return 0;;
+
+      jar)
+        # Any (jar) file
+
+        COMPREPLY=(`compgen -A file -- ${cur}`);
+        return 0;;
+
+      namenode)
+        # All options specified in one line,
+        # enclosed in [] and separated with |
+        temp=`$script ${COMP_WORDS[1]} -help 2>&1 | grep Usage: | cut -d '[' -f 2- | awk '{gsub("] \\| \\[|]", " "); print $0}'`;
+        COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
+        return 0;;
+
+      *)
+        # Other commands - no idea
+
+        return 1;;
+      esac;;
+
+    *)
+      # Additional args
+      
+      case ${COMP_WORDS[1]} in
+      dfs | fs)
+        # DFS/FS subcommand completion
+        # Pull the list of options, grep for the one the user is trying to use,
+        # and then select the description of the relevant argument
+        temp=$((${COMP_CWORD} - 1));
+        temp=`$script ${COMP_WORDS[1]} 2>&1 | grep -- "${COMP_WORDS[2]} " | awk '{gsub("[[ \\]]", ""); print $0}' | cut -d '<' -f ${temp}`;
+
+        if [ ${#temp} -lt 1 ]; then
+          # No match
+          return 1;
+        fi;
+
+        temp=${temp:0:$((${#temp} - 1))};
+
+        # Now do completion based on the argument
+        case $temp in
+        path | src | dst)
+          # DFS path completion
+          temp=`$script ${COMP_WORDS[1]} -ls "${cur}*" 2>&1 | grep -vE '^Found ' | cut -f 1 | awk '{gsub("^.* ", ""); print $0;}'`
+          COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
+          return 0;;
+
+        localsrc | localdst)
+          # Local path completion
+          COMPREPLY=(`compgen -A file -- ${cur}`);
+          return 0;;
+
+        *)
+          # Other arguments - no idea
+          return 1;;
+        esac;;
+
+      *)
+        # Other subcommands - no idea
+        return 1;;
+      esac;
+    esac;
+  fi;
+}
+
+complete -F _hadoop hadoop

+ 305 - 0
common/src/contrib/build-contrib.xml

@@ -0,0 +1,305 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<!-- Imported by contrib/*/build.xml files to share generic targets. -->
+
+<project name="hadoopbuildcontrib" xmlns:ivy="antlib:org.apache.ivy.ant">
+
+  <property name="name" value="${ant.project.name}"/>
+  <dirname property="src.contrib.dir" file="${ant.file.hadoopbuildcontrib}" />
+  <property name="root" value="${basedir}"/>
+
+  <!-- Load all the default properties, and any the user wants    -->
+  <!-- to contribute (without having to type -D or edit this file -->
+  <property file="${user.home}/${name}.build.properties" />
+  <property file="${root}/build.properties" />
+
+  <property name="hadoop.root" location="${src.contrib.dir}/../../"/>
+  <property name="src.dir"  location="${root}/src/java"/>
+  <property name="src.test" location="${root}/src/test"/>
+  <property name="src.examples" location="${root}/src/examples"/>
+
+  <available file="${src.examples}" type="dir" property="examples.available"/>
+  <available file="${src.test}" type="dir" property="test.available"/>
+
+  <property name="conf.dir" location="${hadoop.root}/conf"/>
+  <property name="test.junit.output.format" value="plain"/>
+  <property name="test.output" value="no"/>
+  <property name="test.timeout" value="900000"/>
+  <property name="build.dir" location="${hadoop.root}/build/contrib/${name}"/>
+  <property name="build.classes" location="${build.dir}/classes"/>
+  <property name="build.test" location="${build.dir}/test"/>
+  <property name="build.examples" location="${build.dir}/examples"/>
+  <property name="hadoop.log.dir" location="${build.dir}/test/logs"/>
+  <!-- all jars together -->
+  <property name="javac.deprecation" value="off"/>
+  <property name="javac.debug" value="on"/>
+  <property name="build.ivy.lib.dir" value="${hadoop.root}/build/ivy/lib"/> 
+
+  <property name="javadoc.link"
+            value="http://java.sun.com/j2se/1.4/docs/api/"/>
+
+  <property name="build.encoding" value="ISO-8859-1"/>
+
+  <fileset id="lib.jars" dir="${root}" includes="lib/*.jar"/>
+
+
+   <!-- IVY properties set here -->
+  <property name="ivy.dir" location="ivy" />
+  <property name="ivysettings.xml" location="${hadoop.root}/ivy/ivysettings.xml"/>
+  <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
+  <loadproperties srcfile="${hadoop.root}/ivy/libraries.properties"/>
+  <property name="ivy.jar" location="${hadoop.root}/ivy/ivy-${ivy.version}.jar"/>
+  <property name="ivy_repo_url" 
+	value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
+  <property name="build.dir" location="build" />
+  <property name="build.ivy.dir" location="${build.dir}/ivy" />
+  <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
+  <property name="build.ivy.report.dir" location="${build.ivy.dir}/report" />
+  <property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/> 
+
+  <!--this is the naming policy for artifacts we want pulled down-->
+  <property name="ivy.artifact.retrieve.pattern"
+    			value="${ant.project.name}/[conf]/[artifact]-[revision].[ext]"/>
+
+  <!-- the normal classpath -->
+  <path id="contrib-classpath">
+    <pathelement location="${build.classes}"/>
+    <fileset refid="lib.jars"/>
+    <pathelement location="${hadoop.root}/build/classes"/>
+    <fileset dir="${hadoop.root}/lib">
+      <include name="**/*.jar" />
+    </fileset>
+    <path refid="${ant.project.name}.common-classpath"/>
+    <pathelement path="${clover.jar}"/>
+  </path>
+
+  <!-- the unit test classpath -->
+  <path id="test.classpath">
+    <pathelement location="${build.test}" />
+    <pathelement location="${hadoop.root}/build/test/classes"/>
+    <pathelement location="${hadoop.root}/build/test/core/classes"/>
+    <pathelement location="${hadoop.root}/build/test/hdfs/classes"/>
+    <pathelement location="${hadoop.root}/build/test/mapred/classes"/>
+    <pathelement location="${hadoop.root}/src/contrib/test"/>
+    <pathelement location="${conf.dir}"/>
+    <pathelement location="${hadoop.root}/build"/>
+    <pathelement location="${build.examples}"/>
+    <path refid="contrib-classpath"/>
+  </path>
+
+
+  <!-- to be overridden by sub-projects -->
+  <target name="check-contrib"/>
+  <target name="init-contrib"/>
+
+  <!-- ====================================================== -->
+  <!-- Stuff needed by all targets                            -->
+  <!-- ====================================================== -->
+  <target name="init" depends="check-contrib" unless="skip.contrib">
+    <echo message="contrib: ${name}"/>
+    <mkdir dir="${build.dir}"/>
+    <mkdir dir="${build.classes}"/>
+    <mkdir dir="${build.test}"/>
+    <mkdir dir="${build.examples}"/>
+    <mkdir dir="${hadoop.log.dir}"/>
+    <antcall target="init-contrib"/>
+  </target>
+
+
+  <!-- ====================================================== -->
+  <!-- Compile a Hadoop contrib's files                       -->
+  <!-- ====================================================== -->
+  <target name="compile" depends="init, ivy-retrieve-common" unless="skip.contrib">
+    <echo message="contrib: ${name}"/>
+    <javac
+     encoding="${build.encoding}"
+     srcdir="${src.dir}"
+     includes="**/*.java"
+     destdir="${build.classes}"
+     debug="${javac.debug}"
+     deprecation="${javac.deprecation}">
+     <classpath refid="contrib-classpath"/>
+    </javac>
+  </target>
+
+
+  <!-- ======================================================= -->
+  <!-- Compile a Hadoop contrib's example files (if available) -->
+  <!-- ======================================================= -->
+  <target name="compile-examples" depends="compile" if="examples.available">
+    <echo message="contrib: ${name}"/>
+    <javac
+     encoding="${build.encoding}"
+     srcdir="${src.examples}"
+     includes="**/*.java"
+     destdir="${build.examples}"
+     debug="${javac.debug}">
+     <classpath refid="contrib-classpath"/>
+    </javac>
+  </target>
+
+
+  <!-- ================================================================== -->
+  <!-- Compile test code                                                  -->
+  <!-- ================================================================== -->
+  <target name="compile-test" depends="compile-examples" if="test.available">
+    <echo message="contrib: ${name}"/>
+    <javac
+     encoding="${build.encoding}"
+     srcdir="${src.test}"
+     includes="**/*.java"
+     destdir="${build.test}"
+     debug="${javac.debug}">
+    <classpath refid="test.classpath"/>
+    </javac>
+  </target>
+  
+
+  <!-- ====================================================== -->
+  <!-- Make a Hadoop contrib's jar                            -->
+  <!-- ====================================================== -->
+  <target name="jar" depends="compile" unless="skip.contrib">
+    <echo message="contrib: ${name}"/>
+    <jar
+      jarfile="${build.dir}/hadoop-${version}-${name}.jar"
+      basedir="${build.classes}"      
+    />
+  </target>
+
+  
+  <!-- ====================================================== -->
+  <!-- Make a Hadoop contrib's examples jar                   -->
+  <!-- ====================================================== -->
+  <target name="jar-examples" depends="compile-examples"
+          if="examples.available" unless="skip.contrib">
+    <echo message="contrib: ${name}"/>
+    <jar jarfile="${build.dir}/hadoop-${version}-${name}-examples.jar">
+      <fileset dir="${build.classes}">
+      </fileset>
+      <fileset dir="${build.examples}">
+      </fileset>
+    </jar>
+  </target>
+  
+  <!-- ====================================================== -->
+  <!-- Package a Hadoop contrib                               -->
+  <!-- ====================================================== -->
+  <target name="package" depends="jar, jar-examples" unless="skip.contrib"> 
+    <mkdir dir="${dist.dir}/contrib/${name}"/>
+    <copy todir="${dist.dir}/contrib/${name}" includeEmptyDirs="false" flatten="true">
+      <fileset dir="${build.dir}">
+        <include name="hadoop-${version}-${name}.jar" />
+      </fileset>
+    </copy>
+  </target>
+  
+  <!-- ================================================================== -->
+  <!-- Run unit tests                                                     -->
+  <!-- ================================================================== -->
+  <target name="test" depends="compile-test, compile" if="test.available">
+    <echo message="contrib: ${name}"/>
+    <delete dir="${hadoop.log.dir}"/>
+    <mkdir dir="${hadoop.log.dir}"/>
+    <junit
+      printsummary="yes" showoutput="${test.output}" 
+      haltonfailure="no" fork="yes" maxmemory="256m"
+      errorProperty="tests.failed" failureProperty="tests.failed"
+      timeout="${test.timeout}">
+      
+      <sysproperty key="test.build.data" value="${build.test}/data"/>
+      <sysproperty key="build.test" value="${build.test}"/>
+      <sysproperty key="contrib.name" value="${name}"/>
+      
+      <!-- requires fork=yes for: 
+        relative File paths to use the specified user.dir 
+        classpath to use build/contrib/*.jar
+      -->
+      <sysproperty key="user.dir" value="${build.test}/data"/>
+      
+      <sysproperty key="fs.default.name" value="${fs.default.name}"/>
+      <sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
+      <sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/> 
+      <sysproperty key="taskcontroller-path" value="${taskcontroller-path}"/>
+      <sysproperty key="taskcontroller-user" value="${taskcontroller-user}"/>
+      <classpath refid="test.classpath"/>
+      <formatter type="${test.junit.output.format}" />
+      <batchtest todir="${build.test}" unless="testcase">
+        <fileset dir="${src.test}"
+                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
+      </batchtest>
+      <batchtest todir="${build.test}" if="testcase">
+        <fileset dir="${src.test}" includes="**/${testcase}.java"/>
+      </batchtest>
+    </junit>
+    <fail if="tests.failed">Tests failed!</fail>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Clean.  Delete the build files, and their directories              -->
+  <!-- ================================================================== -->
+  <target name="clean">
+    <echo message="contrib: ${name}"/>
+    <delete dir="${build.dir}"/>
+  </target>
+
+  <target name="ivy-probe-antlib" >
+    <condition property="ivy.found">
+      <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
+    </condition>
+  </target>
+
+
+  <target name="ivy-download" description="To download ivy " unless="offline">
+    <get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
+  </target>
+
+  <target name="ivy-init-antlib" depends="ivy-download,ivy-probe-antlib" unless="ivy.found">
+    <typedef uri="antlib:org.apache.ivy.ant" onerror="fail"
+      loaderRef="ivyLoader">
+      <classpath>
+        <pathelement location="${ivy.jar}"/>
+      </classpath>
+    </typedef>
+    <fail >
+      <condition >
+        <not>
+          <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
+        </not>
+      </condition>
+      You need Apache Ivy 2.0 or later from http://ant.apache.org/
+      It could not be loaded from ${ivy_repo_url}
+    </fail>
+  </target>
+
+  <target name="ivy-init" depends="ivy-init-antlib">
+    <ivy:configure settingsid="${ant.project.name}.ivy.settings" file="${ivysettings.xml}"/>
+  </target>
+
+  <target name="ivy-resolve-common" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="common" />
+  </target>
+
+  <target name="ivy-retrieve-common" depends="ivy-resolve-common"
+    description="Retrieve Ivy-managed artifacts for the compile/test configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings" 
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}" sync="true" />
+    <ivy:cachepath pathid="${ant.project.name}.common-classpath" conf="common" />
+  </target>
+</project>

+ 64 - 0
common/src/contrib/build.xml

@@ -0,0 +1,64 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project name="hadoopcontrib" default="compile" basedir=".">
+  
+  <!-- In case one of the contrib subdirectories -->
+  <!-- fails the build or test targets and you cannot fix it: -->
+  <!-- Then add to fileset: excludes="badcontrib/build.xml" -->
+
+  <!-- ====================================================== -->
+  <!-- Compile contribs.                                      -->
+  <!-- ====================================================== -->
+  <target name="compile">
+    <subant target="compile">
+      <fileset dir="." includes="*/build.xml"/>
+    </subant>
+  </target>
+  
+  <!-- ====================================================== -->
+  <!-- Package contrib jars.                                  -->
+  <!-- ====================================================== -->
+  <target name="package">
+    <subant target="package">
+      <fileset dir="." includes="*/build.xml"/>
+    </subant>
+  </target>
+  
+  <!-- ====================================================== -->
+  <!-- Test all the contribs.                               -->
+  <!-- ====================================================== -->
+  <target name="test">
+    <subant target="test">
+      <fileset dir="." includes="failmon/build.xml"/>
+      <fileset dir="." includes="hod/build.xml"/>
+    </subant>
+  </target>
+  
+  
+  <!-- ====================================================== -->
+  <!-- Clean all the contribs.                              -->
+  <!-- ====================================================== -->
+  <target name="clean">
+    <subant target="clean">
+      <fileset dir="." includes="*/build.xml"/>
+    </subant>
+  </target>
+
+</project>

+ 15 - 0
common/src/contrib/ec2/README.txt

@@ -0,0 +1,15 @@
+Hadoop EC2
+
+NOTE: these scripts have been deprecated. See http://incubator.apache.org/whirr.
+
+This collection of scripts allows you to run Hadoop clusters on Amazon.com's Elastic Compute Cloud (EC2) service described at:
+
+  http://aws.amazon.com/ec2
+  
+To get help, type the following in a shell:
+  
+  bin/hadoop-ec2
+
+For full instructions, please visit the Hadoop wiki at:
+
+  http://wiki.apache.org/hadoop/AmazonEC2#AutomatedScripts

+ 71 - 0
common/src/contrib/ec2/bin/cmd-hadoop-cluster

@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Run commands on master or specified node of a running Hadoop EC2 cluster.
+
+set -o errexit
+
+# if no args specified, show usage
+if [ $# = 0 ]; then
+  echo "Command required!"
+  exit 1
+fi
+
+# get arguments
+COMMAND="$1"
+shift
+# get group
+CLUSTER="$1"
+shift
+
+if [ -z $CLUSTER ]; then
+  echo "Cluster name or instance id required!"
+  exit -1
+fi
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+if [[ $CLUSTER == i-* ]]; then
+  HOST=`ec2-describe-instances $CLUSTER | grep running | awk '{print $4}'`
+  [ -z $HOST ] && echo "Instance still pending or no longer running: $CLUSTER" && exit -1
+else
+  [ ! -f $MASTER_IP_PATH ] && echo "Wrong group name, or cluster not launched! $CLUSTER" && exit -1
+  HOST=`cat $MASTER_IP_PATH`
+fi
+
+if [ "$COMMAND" = "login" ] ; then
+  echo "Logging in to host $HOST."
+  ssh $SSH_OPTS "root@$HOST"
+elif [ "$COMMAND" = "proxy" ] ; then
+  echo "Proxying to host $HOST via local port 6666"
+  echo "Gangia:     http://$HOST/ganglia"
+  echo "JobTracker: http://$HOST:50030/"
+  echo "NameNode:   http://$HOST:50070/"
+  ssh $SSH_OPTS -D 6666 -N "root@$HOST"
+elif [ "$COMMAND" = "push" ] ; then
+  echo "Pushing $1 to host $HOST."
+  scp $SSH_OPTS -r $1 "root@$HOST:"
+elif [ "$COMMAND" = "screen" ] ; then
+  echo "Logging in and attaching screen on host $HOST."
+  ssh $SSH_OPTS -t "root@$HOST" 'screen -D -R'
+else
+  echo "Executing command on host $HOST."
+  ssh $SSH_OPTS -t "root@$HOST" "$COMMAND"
+fi

+ 80 - 0
common/src/contrib/ec2/bin/create-hadoop-image

@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Create a Hadoop AMI.
+# Inspired by Jonathan Siegel's EC2 script (http://blogsiegel.blogspot.com/2006/08/sandboxing-amazon-ec2.html)
+
+set -o errexit
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH | grep available | awk '{print $2}'`
+
+[ ! -z $AMI_IMAGE ] && echo "AMI already registered, use: ec2-deregister $AMI_IMAGE" && exit -1
+
+echo "Starting a AMI with ID $BASE_AMI_IMAGE."
+OUTPUT=`ec2-run-instances $BASE_AMI_IMAGE -k $KEY_NAME -t $INSTANCE_TYPE`
+BOOTING_INSTANCE=`echo $OUTPUT | awk '{print $6}'`
+
+echo "Instance is $BOOTING_INSTANCE."
+
+echo "Polling server status (ec2-describe-instances $BOOTING_INSTANCE)"
+while true; do
+  printf "."
+  HOSTNAME=`ec2-describe-instances $BOOTING_INSTANCE | grep running | awk '{print $4}'`
+  if [ ! -z $HOSTNAME ]; then
+    break;
+  fi
+  sleep 1
+done
+
+echo "The server is available at $HOSTNAME."
+while true; do
+  REPLY=`ssh $SSH_OPTS "root@$HOSTNAME" 'echo "hello"'`
+  if [ ! -z $REPLY ]; then
+   break;
+  fi
+  sleep 5
+done
+
+#read -p "Login first? [yes or no]: " answer
+
+if [ "$answer" == "yes" ]; then
+  ssh $SSH_OPTS "root@$HOSTNAME"
+fi
+
+echo "Copying scripts."
+
+# Copy setup scripts
+scp $SSH_OPTS "$bin"/hadoop-ec2-env.sh "root@$HOSTNAME:/mnt"
+scp $SSH_OPTS "$bin"/image/create-hadoop-image-remote "root@$HOSTNAME:/mnt"
+scp $SSH_OPTS "$bin"/image/ec2-run-user-data "root@$HOSTNAME:/etc/init.d"
+
+# Copy private key and certificate (for bundling image)
+scp $SSH_OPTS $EC2_KEYDIR/pk*.pem "root@$HOSTNAME:/mnt"
+scp $SSH_OPTS $EC2_KEYDIR/cert*.pem "root@$HOSTNAME:/mnt"
+
+# Connect to it
+ssh $SSH_OPTS "root@$HOSTNAME" '/mnt/create-hadoop-image-remote'
+
+# Register image
+ec2-register $S3_BUCKET/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml
+
+echo "Terminate with: ec2-terminate-instances $BOOTING_INSTANCE"

+ 60 - 0
common/src/contrib/ec2/bin/delete-hadoop-cluster

@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Delete the groups and local files associated with a cluster.
+
+set -o errexit
+
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
+fi
+
+CLUSTER=$1
+
+# Finding Hadoop clusters
+CLUSTERS=`ec2-describe-instances | \
+  awk '"RESERVATION" == $1 && $4 ~ /-master$/, "INSTANCE" == $1' | tr '\n' '\t' | \
+  grep "$CLUSTER" | grep running | cut -f4 | rev | cut -d'-' -f2- | rev`
+  
+if [ -n "$CLUSTERS" ]; then
+  echo "Cluster $CLUSTER has running instances. Please terminate them first."
+  exit 0
+fi
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+rm -f $MASTER_IP_PATH
+rm -f $MASTER_PRIVATE_IP_PATH
+
+if ec2-describe-group $CLUSTER_MASTER > /dev/null 2>&1; then
+  if ec2-describe-group $CLUSTER > /dev/null 2>&1; then
+    echo "Revoking authorization between $CLUSTER_MASTER and $CLUSTER"
+    ec2-revoke $CLUSTER_MASTER -o $CLUSTER -u $AWS_ACCOUNT_ID || true
+    ec2-revoke $CLUSTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID || true
+  fi
+  echo "Deleting group $CLUSTER_MASTER"
+  ec2-delete-group $CLUSTER_MASTER
+fi
+
+if ec2-describe-group $CLUSTER > /dev/null 2>&1; then
+  echo "Deleting group $CLUSTER"
+  ec2-delete-group $CLUSTER
+fi

+ 65 - 0
common/src/contrib/ec2/bin/hadoop-ec2

@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+echo "DEPRECATED. See http://incubator.apache.org/whirr." >&2
+
+# if no args specified, show usage
+if [ $# = 0 ]; then
+  echo "Usage: hadoop-ec2 COMMAND"
+  echo "where COMMAND is one of:"
+  echo "  list                                 list all running Hadoop EC2 clusters"
+  echo "  launch-cluster <group> <num slaves>  launch a cluster of Hadoop EC2 instances - launch-master then launch-slaves"
+  echo "  launch-master  <group>               launch or find a cluster master"
+  echo "  launch-slaves  <group> <num slaves>  launch the cluster slaves"
+  echo "  terminate-cluster  <group>           terminate all Hadoop EC2 instances"
+  echo "  delete-cluster <group>               delete the group information for a terminated cluster"
+  echo "  login  <group|instance id>           login to the master node of the Hadoop EC2 cluster"
+  echo "  screen <group|instance id>           start or attach 'screen' on the master node of the Hadoop EC2 cluster"
+  echo "  proxy  <group|instance id>           start a socks proxy on localhost:6666 (use w/foxyproxy)"
+  echo "  push   <group> <file>                scp a file to the master node of the Hadoop EC2 cluster"
+  echo "  <shell cmd> <group|instance id>      execute any command remotely on the master"
+  echo "  create-image                         create a Hadoop AMI"
+  exit 1
+fi
+
+# get arguments
+COMMAND="$1"
+shift
+
+if [ "$COMMAND" = "create-image" ] ; then
+  . "$bin"/create-hadoop-image $*
+elif [ "$COMMAND" = "launch-cluster" ] ; then
+  . "$bin"/launch-hadoop-cluster $*
+elif [ "$COMMAND" = "launch-master" ] ; then
+  . "$bin"/launch-hadoop-master $*
+elif [ "$COMMAND" = "launch-slaves" ] ; then
+  . "$bin"/launch-hadoop-slaves $*
+elif [ "$COMMAND" = "delete-cluster" ] ; then
+  . "$bin"/delete-hadoop-cluster $*
+elif [ "$COMMAND" = "terminate-cluster" ] ; then
+  . "$bin"/terminate-hadoop-cluster $*
+elif [ "$COMMAND" = "list" ] ; then
+  . "$bin"/list-hadoop-clusters
+else
+  . "$bin"/cmd-hadoop-cluster "$COMMAND" $*
+fi
+

+ 93 - 0
common/src/contrib/ec2/bin/hadoop-ec2-env.sh.template

@@ -0,0 +1,93 @@
+# Set environment variables for running Hadoop on Amazon EC2 here. All are required.
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Your Amazon Account Number.
+AWS_ACCOUNT_ID=
+
+# Your Amazon AWS access key.
+AWS_ACCESS_KEY_ID=
+
+# Your Amazon AWS secret access key.
+AWS_SECRET_ACCESS_KEY=
+
+# Location of EC2 keys.
+# The default setting is probably OK if you set up EC2 following the Amazon Getting Started guide.
+EC2_KEYDIR=`dirname "$EC2_PRIVATE_KEY"`
+
+# The EC2 key name used to launch instances.
+# The default is the value used in the Amazon Getting Started guide.
+KEY_NAME=gsg-keypair
+
+# Where your EC2 private key is stored (created when following the Amazon Getting Started guide).
+# You need to change this if you don't store this with your other EC2 keys.
+PRIVATE_KEY_PATH=`echo "$EC2_KEYDIR"/"id_rsa-$KEY_NAME"`
+
+# SSH options used when connecting to EC2 instances.
+SSH_OPTS=`echo -i "$PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o ServerAliveInterval=30`
+
+# The version of Hadoop to use.
+HADOOP_VERSION=0.19.0
+
+# The Amazon S3 bucket where the Hadoop AMI is stored.
+# The default value is for public images, so can be left if you are using running a public image.
+# Change this value only if you are creating your own (private) AMI
+# so you can store it in a bucket you own.
+S3_BUCKET=hadoop-images
+
+# Enable public access to JobTracker and TaskTracker web interfaces
+ENABLE_WEB_PORTS=true
+
+# The script to run on instance boot.
+USER_DATA_FILE=hadoop-ec2-init-remote.sh
+
+# The EC2 instance type: m1.small, m1.large, m1.xlarge
+INSTANCE_TYPE="m1.small"
+#INSTANCE_TYPE="m1.large"
+#INSTANCE_TYPE="m1.xlarge"
+#INSTANCE_TYPE="c1.medium"
+#INSTANCE_TYPE="c1.xlarge"
+
+# The EC2 group master name. CLUSTER is set by calling scripts
+CLUSTER_MASTER=$CLUSTER-master
+
+# Cached values for a given cluster
+MASTER_PRIVATE_IP_PATH=~/.hadooop-private-$CLUSTER_MASTER
+MASTER_IP_PATH=~/.hadooop-$CLUSTER_MASTER
+MASTER_ZONE_PATH=~/.hadooop-zone-$CLUSTER_MASTER
+
+#
+# The following variables are only used when creating an AMI.
+#
+
+# The version number of the installed JDK.
+JAVA_VERSION=1.6.0_07
+
+# SUPPORTED_ARCHITECTURES = ['i386', 'x86_64']
+# The download URL for the Sun JDK. Visit http://java.sun.com/javase/downloads/index.jsp and get the URL for the "Linux self-extracting file".
+if [ "$INSTANCE_TYPE" == "m1.small" -o "$INSTANCE_TYPE" == "c1.medium" ]; then
+  ARCH='i386'
+  BASE_AMI_IMAGE="ami-2b5fba42"  # ec2-public-images/fedora-8-i386-base-v1.07.manifest.xml
+  JAVA_BINARY_URL=''
+else
+  ARCH='x86_64'
+  BASE_AMI_IMAGE="ami-2a5fba43"  # ec2-public-images/fedora-8-x86_64-base-v1.07.manifest.xml
+  JAVA_BINARY_URL=''
+fi
+
+if [ "$AMI_KERNEL" != "" ]; then
+  KERNEL_ARG="--kernel ${AMI_KERNEL}"
+fi

+ 171 - 0
common/src/contrib/ec2/bin/hadoop-ec2-init-remote.sh

@@ -0,0 +1,171 @@
+#!/usr/bin/env bash
+
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+
+################################################################################
+# Script that is run on each EC2 instance on boot. It is passed in the EC2 user
+# data, so should not exceed 16K in size.
+################################################################################
+
+################################################################################
+# Initialize variables
+################################################################################
+
+# Slaves are started after the master, and are told its address by sending a
+# modified copy of this file which sets the MASTER_HOST variable. 
+# A node  knows if it is the master or not by inspecting the security group
+# name. If it is the master then it retrieves its address using instance data.
+MASTER_HOST=%MASTER_HOST% # Interpolated before being sent to EC2 node
+SECURITY_GROUPS=`wget -q -O - http://169.254.169.254/latest/meta-data/security-groups`
+IS_MASTER=`echo $SECURITY_GROUPS | awk '{ a = match ($0, "-master$"); if (a) print "true"; else print "false"; }'`
+if [ "$IS_MASTER" == "true" ]; then
+ # use public hostnames for master. private hostnames can be used by substituting:
+ # MASTER_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname`
+ MASTER_HOST=`wget -q -O - 'http://169.254.169.254/latest/meta-data/public-hostname'`
+fi
+
+HADOOP_HOME=`ls -d /usr/local/hadoop-*`
+
+################################################################################
+# Hadoop configuration
+# Modify this section to customize your Hadoop cluster.
+################################################################################
+
+cat > $HADOOP_HOME/conf/hadoop-site.xml <<EOF
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+  <name>hadoop.tmp.dir</name>
+  <value>/mnt/hadoop</value>
+</property>
+
+<property>
+  <name>fs.default.name</name>
+  <value>hdfs://$MASTER_HOST:50001</value>
+</property>
+
+<property>
+  <name>mapred.job.tracker</name>
+  <value>hdfs://$MASTER_HOST:50002</value>
+</property>
+
+<property>
+  <name>tasktracker.http.threads</name>
+  <value>80</value>
+</property>
+
+<property>
+  <name>mapred.tasktracker.map.tasks.maximum</name>
+  <value>3</value>
+</property>
+
+<property>
+  <name>mapred.tasktracker.reduce.tasks.maximum</name>
+  <value>3</value>
+</property>
+
+<property>
+  <name>mapred.output.compress</name>
+  <value>true</value>
+</property>
+
+<property>
+  <name>mapred.output.compression.type</name>
+  <value>BLOCK</value>
+</property>
+
+<property>
+  <name>dfs.client.block.write.retries</name>
+  <value>3</value>
+</property>
+
+<property>
+  <name>hadoop.rpc.socket.factory.class.default</name>
+  <value>org.apache.hadoop.net.StandardSocketFactory</value>
+  <final>true</final>
+</property>
+
+</configuration>
+EOF
+
+# Configure Hadoop for Ganglia
+# overwrite hadoop-metrics.properties
+cat > $HADOOP_HOME/conf/hadoop-metrics.properties <<EOF
+
+# Ganglia
+# we push to the master gmond so hostnames show up properly
+dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+dfs.period=10
+dfs.servers=$MASTER_HOST:8649
+
+mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+mapred.period=10
+mapred.servers=$MASTER_HOST:8649
+
+jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+jvm.period=10
+jvm.servers=$MASTER_HOST:8649
+EOF
+
+################################################################################
+# Start services
+################################################################################
+
+[ ! -f /etc/hosts ] &&  echo "127.0.0.1 localhost" > /etc/hosts
+
+mkdir -p /mnt/hadoop/logs
+
+# not set on boot
+export USER="root"
+
+if [ "$IS_MASTER" == "true" ]; then
+  # MASTER
+  # Prep Ganglia
+  sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \
+         -e "s|\( *bind *=.*\)|#\1|" \
+         -e "s|\( *mute *=.*\)|  mute = yes|" \
+         -e "s|\( *location *=.*\)|  location = \"master-node\"|" \
+         /etc/gmond.conf
+  mkdir -p /mnt/ganglia/rrds
+  chown -R ganglia:ganglia /mnt/ganglia/rrds
+  rm -rf /var/lib/ganglia; cd /var/lib; ln -s /mnt/ganglia ganglia; cd
+  service gmond start
+  service gmetad start
+  apachectl start
+
+  # Hadoop
+  # only format on first boot
+  [ ! -e /mnt/hadoop/dfs ] && "$HADOOP_HOME"/bin/hadoop namenode -format
+
+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start namenode
+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start jobtracker
+else
+  # SLAVE
+  # Prep Ganglia
+  sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \
+         -e "s|\( *bind *=.*\)|#\1|" \
+         -e "s|\(udp_send_channel {\)|\1\n  host=$MASTER_HOST|" \
+         /etc/gmond.conf
+  service gmond start
+
+  # Hadoop
+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start datanode
+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start tasktracker
+fi
+
+# Run this script on next boot
+rm -f /var/ec2/ec2-run-user-data.*

+ 80 - 0
common/src/contrib/ec2/bin/image/create-hadoop-image-remote

@@ -0,0 +1,80 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Create a Hadoop AMI. Runs on the EC2 instance.
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+# Remove environment script since it contains sensitive information
+rm -f "$bin"/hadoop-ec2-env.sh
+
+# Install Java
+echo "Downloading and installing java binary."
+cd /usr/local
+wget -nv -O java.bin $JAVA_BINARY_URL
+sh java.bin
+rm -f java.bin
+
+# Install tools
+echo "Installing rpms."
+yum -y install rsync lynx screen ganglia-gmetad ganglia-gmond ganglia-web httpd php
+yum -y clean all
+
+# Install Hadoop
+echo "Installing Hadoop $HADOOP_VERSION."
+cd /usr/local
+wget -nv http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
+[ ! -f hadoop-$HADOOP_VERSION.tar.gz ] && wget -nv http://www.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
+tar xzf hadoop-$HADOOP_VERSION.tar.gz
+rm -f hadoop-$HADOOP_VERSION.tar.gz
+
+# Configure Hadoop
+sed -i -e "s|# export JAVA_HOME=.*|export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}|" \
+       -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/mnt/hadoop/logs|' \
+       -e 's|# export HADOOP_SLAVE_SLEEP=.*|export HADOOP_SLAVE_SLEEP=1|' \
+       -e 's|# export HADOOP_OPTS=.*|export HADOOP_OPTS=-server|' \
+      /usr/local/hadoop-$HADOOP_VERSION/conf/hadoop-env.sh
+
+# Run user data as script on instance startup
+chmod +x /etc/init.d/ec2-run-user-data
+echo "/etc/init.d/ec2-run-user-data" >> /etc/rc.d/rc.local
+
+# Setup root user bash environment
+echo "export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}" >> /root/.bash_profile
+echo "export HADOOP_HOME=/usr/local/hadoop-${HADOOP_VERSION}" >> /root/.bash_profile
+echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> /root/.bash_profile
+
+# Configure networking.
+# Delete SSH authorized_keys since it includes the key it was launched with. (Note that it is re-populated when an instance starts.)
+rm -f /root/.ssh/authorized_keys
+# Ensure logging in to new hosts is seamless.
+echo '    StrictHostKeyChecking no' >> /etc/ssh/ssh_config
+
+# Bundle and upload image
+cd ~root
+# Don't need to delete .bash_history since it isn't written until exit.
+df -h
+ec2-bundle-vol -d /mnt -k /mnt/pk*.pem -c /mnt/cert*.pem -u $AWS_ACCOUNT_ID -s 3072 -p hadoop-$HADOOP_VERSION-$ARCH -r $ARCH
+
+ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml -a $AWS_ACCESS_KEY_ID -s $AWS_SECRET_ACCESS_KEY
+
+# End
+echo Done

+ 63 - 0
common/src/contrib/ec2/bin/image/ec2-run-user-data

@@ -0,0 +1,63 @@
+#!/bin/bash
+
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+# ec2-run-user-data - Run instance user-data if it looks like a script.
+#
+# Only retrieves and runs the user-data script once per instance.  If
+# you want the user-data script to run again (e.g., on the next boot)
+# then add this command in the user-data script:
+#   rm -f /var/ec2/ec2-run-user-data.*
+#
+# History:
+#   2008-05-16 Eric Hammond <ehammond@thinksome.com>
+#   - Initial version including code from Kim Scheibel, Jorge Oliveira
+#   2008-08-06 Tom White
+#   - Updated to use mktemp on fedora
+#
+
+prog=$(basename $0)
+logger="logger -t $prog"
+curl="curl --retry 3 --silent --show-error --fail"
+instance_data_url=http://169.254.169.254/2008-02-01
+
+# Wait until networking is up on the EC2 instance.
+perl -MIO::Socket::INET -e '
+ until(new IO::Socket::INET("169.254.169.254:80")){print"Waiting for network...\n";sleep 1}
+' | $logger
+
+# Exit if we have already run on this instance (e.g., previous boot).
+ami_id=$($curl $instance_data_url/meta-data/ami-id)
+been_run_file=/var/ec2/$prog.$ami_id
+mkdir -p $(dirname $been_run_file)
+if [ -f $been_run_file ]; then
+  $logger < $been_run_file
+  exit
+fi
+
+# Retrieve the instance user-data and run it if it looks like a script
+user_data_file=`mktemp -t ec2-user-data.XXXXXXXXXX`
+chmod 700 $user_data_file
+$logger "Retrieving user-data"
+$curl -o $user_data_file $instance_data_url/user-data 2>&1 | $logger
+if [ ! -s $user_data_file ]; then
+  $logger "No user-data available"
+elif head -1 $user_data_file | egrep -v '^#!'; then
+  $logger "Skipping user-data as it does not begin with #!"
+else
+  $logger "Running user-data"
+  echo "user-data has already been run on this instance" > $been_run_file
+  $user_data_file 2>&1 | logger -t "user-data"
+  $logger "user-data exit code: $?"
+fi
+rm -f $user_data_file

+ 42 - 0
common/src/contrib/ec2/bin/launch-hadoop-cluster

@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Launch an EC2 cluster of Hadoop instances.
+
+set -o errexit
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
+fi
+
+if [ -z $2 ]; then
+  echo "Must specify the number of slaves to start."
+  exit -1
+fi
+
+if ! "$bin"/launch-hadoop-master $1 ; then
+  exit $?
+fi
+
+if ! "$bin"/launch-hadoop-slaves $*; then
+  exit $?
+fi

+ 119 - 0
common/src/contrib/ec2/bin/launch-hadoop-master

@@ -0,0 +1,119 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Launch an EC2 Hadoop master.
+
+set -o errexit
+
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
+fi
+
+CLUSTER=$1
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+if [ -z $AWS_ACCOUNT_ID ]; then
+  echo "Please set AWS_ACCOUNT_ID in $bin/hadoop-ec2-env.sh."
+  exit -1
+fi
+
+echo "Testing for existing master in group: $CLUSTER"
+MASTER_EC2_HOST=`ec2-describe-instances | awk '"RESERVATION" == $1 && "'$CLUSTER_MASTER'" == $4, "RESERVATION" == $1 && "'$CLUSTER_MASTER'" != $4'`
+MASTER_EC2_HOST=`echo "$MASTER_EC2_HOST" | awk '"INSTANCE" == $1 && "running" == $6 {print $4}'`
+
+if [ ! -z "$MASTER_EC2_HOST" ]; then
+  echo "Master already running on: $MASTER_EC2_HOST"
+  MASTER_HOST=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_EC2_HOST | awk '{print $5}'`
+  echo $MASTER_HOST > $MASTER_PRIVATE_IP_PATH
+  echo $MASTER_EC2_HOST > $MASTER_IP_PATH
+  exit 0
+fi
+
+if ! ec2-describe-group $CLUSTER_MASTER > /dev/null 2>&1; then
+  echo "Creating group $CLUSTER_MASTER"
+  ec2-add-group $CLUSTER_MASTER -d "Group for Hadoop Master."
+  ec2-authorize $CLUSTER_MASTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID
+  ec2-authorize $CLUSTER_MASTER -p 22    # ssh
+
+  if [ $ENABLE_WEB_PORTS == "true" ]; then
+    ec2-authorize $CLUSTER_MASTER -p 50030 # JobTracker web interface
+    ec2-authorize $CLUSTER_MASTER -p 50060 # TaskTracker web interface
+    ec2-authorize $CLUSTER_MASTER -p 50070 # NameNode web interface
+    ec2-authorize $CLUSTER_MASTER -p 50075 # DataNode web interface
+  fi
+fi
+
+if ! ec2-describe-group $CLUSTER > /dev/null 2>&1; then
+  echo "Creating group $CLUSTER"
+  ec2-add-group $CLUSTER -d "Group for Hadoop Slaves."
+  ec2-authorize $CLUSTER -o $CLUSTER -u $AWS_ACCOUNT_ID
+  ec2-authorize $CLUSTER -p 22    # ssh
+
+  if [ $ENABLE_WEB_PORTS == "true" ]; then
+    ec2-authorize $CLUSTER -p 50030 # JobTracker web interface
+    ec2-authorize $CLUSTER -p 50060 # TaskTracker web interface
+    ec2-authorize $CLUSTER -p 50070 # NameNode web interface
+    ec2-authorize $CLUSTER -p 50075 # DataNode web interface
+  fi
+
+  ec2-authorize $CLUSTER_MASTER -o $CLUSTER -u $AWS_ACCOUNT_ID
+  ec2-authorize $CLUSTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID
+fi
+
+# Finding Hadoop image
+AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH | grep available | awk '{print $2}'`
+
+# Start a master
+echo "Starting master with AMI $AMI_IMAGE"
+USER_DATA="MASTER_HOST=master,MAX_MAP_TASKS=$MAX_MAP_TASKS,MAX_REDUCE_TASKS=$MAX_REDUCE_TASKS,COMPRESS=$COMPRESS"
+INSTANCE=`ec2-run-instances $AMI_IMAGE -n 1 -g $CLUSTER_MASTER -k $KEY_NAME -f "$bin"/$USER_DATA_FILE -t $INSTANCE_TYPE $KERNEL_ARG | grep INSTANCE | awk '{print $2}'`
+echo "Waiting for instance $INSTANCE to start"
+while true; do
+  printf "."
+  # get private dns
+  MASTER_HOST=`ec2-describe-instances $INSTANCE | grep running | awk '{print $5}'`
+  if [ ! -z $MASTER_HOST ]; then
+    echo "Started as $MASTER_HOST"
+    break;
+  fi
+  sleep 1
+done
+
+MASTER_EC2_HOST=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_HOST | awk '{print $4}'`
+echo $MASTER_HOST > $MASTER_PRIVATE_IP_PATH
+echo $MASTER_EC2_HOST > $MASTER_IP_PATH
+MASTER_EC2_ZONE=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_HOST | awk '{print $11}'`
+echo $MASTER_EC2_ZONE > $MASTER_ZONE_PATH
+
+while true; do
+  if ssh $SSH_OPTS "root@$MASTER_EC2_HOST" 'echo "hello"' > /dev/null 2>&1; then
+   break;
+  fi
+  sleep 5
+done
+
+echo "Copying private key to master"
+scp $SSH_OPTS $PRIVATE_KEY_PATH "root@$MASTER_EC2_HOST:/root/.ssh/id_rsa"
+ssh $SSH_OPTS "root@$MASTER_EC2_HOST" "chmod 600 /root/.ssh/id_rsa"
+
+MASTER_IP=`dig +short $MASTER_EC2_HOST`
+echo "Master is $MASTER_EC2_HOST, ip is $MASTER_IP, zone is $MASTER_EC2_ZONE."

+ 59 - 0
common/src/contrib/ec2/bin/launch-hadoop-slaves

@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Launch an EC2 Hadoop slaves.
+
+set -o errexit
+
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
+fi
+
+if [ -z $2 ]; then
+  echo "Must specify the number of slaves to start."
+  exit -1
+fi
+
+CLUSTER=$1
+NO_INSTANCES=$2
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+if [ ! -f $MASTER_IP_PATH ]; then
+  echo "Must start Cluster Master first!"
+  exit -1
+fi
+
+# Finding Hadoop image
+AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH |grep available | awk '{print $2}'`
+# to use private master hostname, substitute below with:
+# MASTER_HOST=`cat $MASTER_PRIVATE_IP_PATH`
+MASTER_HOST=`cat $MASTER_IP_PATH`
+MASTER_ZONE=`cat $MASTER_ZONE_PATH`
+
+# Substituting master hostname
+sed -e "s|%MASTER_HOST%|$MASTER_HOST|" "$bin"/$USER_DATA_FILE > "$bin"/$USER_DATA_FILE.slave
+
+# Start slaves
+echo "Adding $1 node(s) to cluster group $CLUSTER with AMI $AMI_IMAGE"
+ec2-run-instances $AMI_IMAGE -n "$NO_INSTANCES" -g "$CLUSTER" -k "$KEY_NAME" -f "$bin"/$USER_DATA_FILE.slave -t "$INSTANCE_TYPE" -z "$MASTER_ZONE" $KERNEL_ARG | grep INSTANCE | awk '{print $2}'
+
+rm "$bin"/$USER_DATA_FILE.slave

+ 33 - 0
common/src/contrib/ec2/bin/list-hadoop-clusters

@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# List running clusters.
+
+set -o errexit
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+# Finding Hadoop clusters
+CLUSTERS=`ec2-describe-instances | awk '"RESERVATION" == $1 && $4 ~ /-master$/, "INSTANCE" == $1' | tr '\n' '\t' | grep running | cut -f4 | rev | cut -d'-' -f2- | rev`
+
+[ -z "$CLUSTERS" ] && echo "No running clusters." && exit 0
+
+echo "Running Hadoop clusters:"
+echo "$CLUSTERS"

+ 48 - 0
common/src/contrib/ec2/bin/terminate-hadoop-cluster

@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Terminate a cluster.
+
+set -o errexit
+
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
+fi
+
+CLUSTER=$1
+
+# Import variables
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+# Finding Hadoop image
+HADOOP_INSTANCES=`ec2-describe-instances | awk '"RESERVATION" == $1 && ("'$CLUSTER'" == $4 || "'$CLUSTER_MASTER'" == $4), "RESERVATION" == $1 && ("'$CLUSTER'" != $4 && "'$CLUSTER_MASTER'" != $4)'`
+HADOOP_INSTANCES=`echo "$HADOOP_INSTANCES" | grep INSTANCE | grep running`
+
+[ -z "$HADOOP_INSTANCES" ] && echo "No running instances in cluster $CLUSTER." && exit 0
+
+echo "Running Hadoop instances:"
+echo "$HADOOP_INSTANCES"
+read -p "Terminate all instances? [yes or no]: " answer
+
+if [ "$answer" != "yes" ]; then
+  exit 1
+fi
+
+ec2-terminate-instances `echo "$HADOOP_INSTANCES" | awk '{print $2}'`

+ 97 - 0
common/src/contrib/failmon/README

@@ -0,0 +1,97 @@
+****************** FailMon Quick Start Guide ***********************
+
+This document is a guide to quickly setting up and running FailMon.
+For more information and details please see the FailMon User Manual.
+
+***** Building FailMon *****
+
+Normally, FailMon lies under <hadoop-dir>/src/contrib/failmon, where
+<hadoop-source-dir> is the Hadoop project root folder. To compile it,
+one can either run ant for the whole Hadoop project, i.e.:
+
+$ cd <hadoop-dir>
+$ ant
+
+or run ant only for FailMon:
+
+$ cd <hadoop-dir>/src/contrib/failmon
+$ ant
+
+The above will compile FailMon and place all class files under
+<hadoop-dir>/build/contrib/failmon/classes.
+
+By invoking:
+
+$ cd <hadoop-dir>/src/contrib/failmon
+$ ant tar
+
+FailMon is packaged as a standalone jar application in
+<hadoop-dir>/src/contrib/failmon/failmon.tar.gz.
+
+
+***** Deploying FailMon *****
+
+There are two ways FailMon can be deployed in a cluster:
+
+a) Within Hadoop, in which case the whole Hadoop package is uploaded
+to the cluster nodes. In that case, nothing else needs to be done on
+individual nodes.
+
+b) Independently of the Hadoop deployment, i.e., by uploading
+failmon.tar.gz to all nodes and uncompressing it. In that case, the
+bin/failmon.sh script needs to be edited; environment variable
+HADOOPDIR should point to the root directory of the Hadoop
+distribution. Also the location of the Hadoop configuration files
+should be pointed by the property 'hadoop.conf.path' in file
+conf/failmon.properties. Note that these files refer to the HDFS in
+which we want to store the FailMon data (which can potentially be
+different than the one on the cluster we are monitoring).
+
+We assume that either way FailMon is placed in the same directory on
+all nodes, which is typical for most clusters. If this is not
+feasible, one should create the same symbolic link on all nodes of the
+cluster, that points to the FailMon directory of each node.
+
+One should also edit the conf/failmon.properties file on each node to
+set his own property values. However, the default values are expected
+to serve most practical cases. Refer to the FailMon User Manual about
+the various properties and configuration parameters.
+
+
+***** Running FailMon *****
+
+In order to run FailMon using a node to do the ad-hoc scheduling of
+monitoring jobs, one needs edit the hosts.list file to specify the
+list of machine hostnames on which FailMon is to be run. Also, in file
+conf/global.config the username used to connect to the machines has to
+be specified (passwordless SSH is assumed) in property 'ssh.username'.
+In property 'failmon.dir', the path to the FailMon folder has to be
+specified as well (it is assumed to be the same on all machines in the
+cluster). Then one only needs to invoke the command:
+
+$ cd <hadoop-dir>
+$ bin/scheduler.py
+
+to start the system.
+
+
+***** Merging HDFS files *****
+
+For the purpose of merging the files created on HDFS by FailMon, the
+following command can be used:
+
+$ cd <hadoop-dir>
+$ bin/failmon.sh --mergeFiles
+
+This will concatenate all files in the HDFS folder (pointed to by the
+'hdfs.upload.dir' property in conf/failmon.properties file) into a
+single file, which will be placed in the same folder. Also the
+location of the Hadoop configuration files should be pointed by the
+property 'hadoop.conf.path' in file conf/failmon.properties. Note that
+these files refer to the HDFS in which have stored the FailMon data
+(which can potentially be different than the one on the cluster we are
+monitoring). Also, the scheduler.py script can be set up to merge the
+HDFS files when their number surpasses a configurable limit (see
+'conf/global.config' file).
+
+Please refer to the FailMon User Manual for more details.

+ 54 - 0
common/src/contrib/failmon/bin/failmon.sh

@@ -0,0 +1,54 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# First we need to determine whether Failmon has been distributed with
+# Hadoop, or as standalone. In the latter case failmon.jar will lie in
+# the current directory.
+
+JARNAME="failmon.jar"
+HADOOPDIR=""
+CLASSPATH=""
+
+if [ `ls -l | grep src | wc -l` == 0 ]
+then
+    # standalone binary
+    if [ -n $1 ] && [ "$1" == "--mergeFiles" ]
+    then
+	jar -ufe $JARNAME org.apache.hadoop.contrib.failmon.HDFSMerger
+        java -jar $JARNAME
+    else
+    	jar -ufe $JARNAME org.apache.hadoop.contrib.failmon.RunOnce
+	java -jar $JARNAME $*
+    fi
+else
+    # distributed with Hadoop
+    HADOOPDIR=`pwd`/../../../
+    CLASSPATH=$CLASSPATH:$HADOOPDIR/build/contrib/failmon/classes
+    CLASSPATH=$CLASSPATH:$HADOOPDIR/build/classes
+    CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/commons-logging-api-1*.jar`
+    CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/commons-logging-1*.jar`
+    CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/log4j-*.jar`
+#    echo $CLASSPATH
+    if [ -n $1 ] && [ "$1" == "--mergeFiles" ]
+    then
+        java -cp $CLASSPATH org.apache.hadoop.contrib.failmon.HDFSMerger
+    else
+        java -cp $CLASSPATH org.apache.hadoop.contrib.failmon.RunOnce $*
+    fi
+fi
+

+ 235 - 0
common/src/contrib/failmon/bin/scheduler.py

@@ -0,0 +1,235 @@
+#!/usr/bin/python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Schedule FailMon execution for nodes of file hosts.list, according to
+# the properties file conf/global.config.
+
+import time
+import ConfigParser
+import subprocess
+import threading
+import random
+
+jobs = []
+username = "user"
+connections = 10
+failmonDir = ""
+maxFiles = 100
+
+# This class represents a thread that connects to a set of cluster
+# nodes to locally execute monitoring jobs. These jobs are specified
+# as a shell command in the constructor.
+class sshThread (threading.Thread):
+
+    def __init__(self, threadname, username, command, failmonDir):
+        threading.Thread.__init__(self)
+        self.name = threadname
+        self.username = username
+        self.command = command
+        self.failmonDir = failmonDir
+        self.hosts = []
+
+    def addHost(self, host):
+        self.hosts.append(host)
+        
+    def run (self):
+        for host in self.hosts:
+            toRun = ["ssh", self.username + "@" + host, "cd " + self.failmonDir + " ; " + self.command]
+            print "Thread", self.name, "invoking command on", host, ":\t", toRun, "...",
+            subprocess.check_call(toRun)
+            print "Done!"
+
+# This class represents a monitoring job. The param member is a string
+# that can be passed in the '--only' list of jobs given to the Java
+# class org.apache.hadoop.contrib.failmon.RunOnce for execution on a
+# node.
+class Job:
+    def __init__(self, param, interval):
+        self.param = param
+        self.interval = interval
+        self.counter = interval
+        return
+
+    def reset(self):
+        self.counter = self.interval
+
+# This function reads the configuration file to get the values of the
+# configuration parameters.
+def getJobs(file):
+    global username
+    global connections
+    global jobs
+    global failmonDir
+    global maxFiles
+    
+    conf = ConfigParser.SafeConfigParser()
+    conf.read(file)
+
+    username = conf.get("Default", "ssh.username")
+    connections = int(conf.get("Default", "max.connections"))
+    failmonDir = conf.get("Default", "failmon.dir")
+    maxFiles = conf.get("Default", "hdfs.files.max")
+    
+    # Hadoop Log
+    interval = int(conf.get("Default", "log.hadoop.interval"))
+
+    if interval != 0:
+        jobs.append(Job("hadoopLog", interval))
+
+    # System Log
+    interval = int(conf.get("Default", "log.system.interval"))
+
+    if interval != 0:
+        jobs.append(Job("systemLog", interval))
+
+    # NICs
+    interval = int(conf.get("Default", "nics.interval"))
+
+    if interval != 0:
+        jobs.append(Job("nics", interval))
+
+    # CPU
+    interval = int(conf.get("Default", "cpu.interval"))
+
+    if interval != 0:
+        jobs.append(Job("cpu", interval))
+
+    # CPU
+    interval = int(conf.get("Default", "disks.interval"))
+
+    if interval != 0:
+        jobs.append(Job("disks", interval))
+
+    # sensors
+    interval = int(conf.get("Default", "sensors.interval"))
+
+    if interval != 0:
+        jobs.append(Job("sensors", interval))
+
+    # upload
+    interval = int(conf.get("Default", "upload.interval"))
+
+    if interval != 0:
+        jobs.append(Job("upload", interval))
+
+    return
+
+
+# Compute the gcd (Greatest Common Divisor) of two integerss
+def GCD(a, b):
+    assert isinstance(a, int)
+    assert isinstance(b, int)
+
+    while a:
+        a, b = b%a, a
+
+    return b
+
+# Compute the gcd (Greatest Common Divisor) of a list of integers
+def listGCD(joblist):
+    assert isinstance(joblist, list)
+
+    if (len(joblist) == 1):
+        return joblist[0].interval
+
+    g = GCD(joblist[0].interval, joblist[1].interval)
+
+    for i in range (2, len(joblist)):
+        g = GCD(g, joblist[i].interval)
+        
+    return g
+
+# Merge all failmon files created on the HDFS into a single file
+def mergeFiles():
+    global username
+    global failmonDir
+    hostList = []
+    hosts = open('./conf/hosts.list', 'r')
+    for host in hosts:
+        hostList.append(host.strip().rstrip())
+    randomHost = random.sample(hostList, 1)
+    mergeCommand = "bin/failmon.sh --mergeFiles"
+    toRun = ["ssh", username + "@" + randomHost[0], "cd " + failmonDir + " ; " + mergeCommand]
+    print "Invoking command on", randomHost, ":\t", mergeCommand, "...",
+    subprocess.check_call(toRun)
+    print "Done!"
+    return
+
+# The actual scheduling is done here
+def main():
+    getJobs("./conf/global.config")
+
+    for job in jobs:
+        print "Configuration: ", job.param, "every", job.interval, "seconds"
+        
+    globalInterval = listGCD(jobs)
+        
+    while True :
+        time.sleep(globalInterval)
+        params = []
+        
+        for job in jobs:
+            job.counter -= globalInterval
+            
+            if (job.counter <= 0):
+                params.append(job.param)
+                job.reset()
+                
+        if (len(params) == 0):
+            continue;
+                    
+        onlyStr = "--only " + params[0]
+        for i in range(1, len(params)):
+            onlyStr += ',' + params[i] 
+                
+        command = "bin/failmon.sh " + onlyStr
+
+        # execute on all nodes
+        hosts = open('./conf/hosts.list', 'r')
+        threadList = []
+        # create a thread for every connection
+        for i in range(0, connections):
+            threadList.append(sshThread(i, username, command, failmonDir))
+
+        # assign some hosts/connections hosts to every thread
+        cur = 0;
+        for host in hosts:
+            threadList[cur].addHost(host.strip().rstrip())
+            cur += 1
+            if (cur == len(threadList)):
+                cur = 0    
+
+        for ready in threadList:
+            ready.start()
+
+        for ssht in threading.enumerate():
+            if ssht != threading.currentThread():
+                ssht.join()
+
+        # if an upload has been done, then maybe we need to merge the
+        # HDFS files
+        if "upload" in params:
+            mergeFiles()
+
+    return
+
+
+if __name__ == '__main__':
+    main()
+

+ 120 - 0
common/src/contrib/failmon/build.xml

@@ -0,0 +1,120 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project name="failmon" default="compile">
+
+  <import file="../build-contrib.xml"/>
+
+  <property name="jarfile" value="${build.dir}/${name}.jar"/>
+
+  <target name="jar" depends="compile" unless="skip.contrib">
+    <!-- Make sure that the hadoop jar has been created -->
+<!-- This works, but causes findbugs to fail
+    <subant antfile="build.xml" target="jar">
+      <fileset dir="../../.." includes="build.xml"/>
+    </subant>
+-->
+    <!-- Copy the required files so that the jar can run independently
+	 of Hadoop source code -->
+    
+  <!-- create the list of files to add to the classpath -->
+  <fileset dir="${hadoop.root}/lib" id="class.path">
+    <include name="**/*.jar" />
+    <exclude name="**/excluded/" />
+  </fileset>
+  
+  <pathconvert pathsep=" " property="failmon-class-path" refid="class.path">
+    <map from="${basedir}/" to=""/>
+  </pathconvert>
+
+    <echo message="contrib: ${name}"/>
+    <jar jarfile="${jarfile}" basedir="${build.classes}">
+      <manifest>
+        <attribute name="Main-Class" value="org.apache.hadoop.contrib.failmon.RunOnce"/>
+	<attribute name="Class-Path" value="${failmon-class-path}"/> 
+      </manifest>
+    </jar>
+
+  </target>
+
+  
+  <!-- Override test target to copy sample data -->
+  <target name="test" depends="compile-test, compile, compile-examples" if="test.available">
+    <echo message="contrib: ${name}"/>
+    <delete dir="${hadoop.log.dir}"/>
+    <mkdir dir="${hadoop.log.dir}"/>
+    <delete dir="${build.test}/sample"/>
+    <mkdir dir="${build.test}/sample"/>
+    <copy todir="${build.test}/sample">
+      <fileset dir="${root}/sample"/>
+    </copy>
+    <junit
+      printsummary="yes" showoutput="${test.output}" 
+      haltonfailure="no" fork="yes" maxmemory="256m"
+      errorProperty="tests.failed" failureProperty="tests.failed"
+      timeout="${test.timeout}">
+      
+      <sysproperty key="test.build.data" value="${build.test}/data"/>
+      <sysproperty key="build.test" value="${build.test}"/>
+      <sysproperty key="contrib.name" value="${name}"/>
+      
+      <!-- requires fork=yes for: 
+        relative File paths to use the specified user.dir 
+        classpath to use build/contrib/*.jar
+      -->
+      <sysproperty key="user.dir" value="${build.test}/data"/>
+      
+      <sysproperty key="fs.default.name" value="${fs.default.name}"/>
+      <sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
+      <sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/>
+      <classpath refid="test.classpath"/>
+      <formatter type="${test.junit.output.format}" />
+      <batchtest todir="${build.test}" unless="testcase">
+        <fileset dir="${src.test}"
+                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
+      </batchtest>
+      <batchtest todir="${build.test}" if="testcase">
+        <fileset dir="${src.test}" includes="**/${testcase}.java"/>
+      </batchtest>
+    </junit>
+    <fail if="tests.failed">Tests failed!</fail>
+
+  </target>
+  
+  <target name="tar" depends="jar">
+
+    <copy todir=".">
+      <fileset dir="${hadoop.root}/build/contrib/failmon/"
+	       includes="failmon.jar"/>
+    </copy>
+    
+    <tar tarfile="${name}.tar" 
+	 basedir=".." 
+	 includes="${name}/**"
+	 excludes="${name}/${name}.tar.gz, ${name}/src/**, ${name}/logs/**, ${name}/build.xml*"/>
+    <gzip zipfile="${name}.tar.gz" src="${name}.tar"/>
+    <delete file="${name}.tar"/>
+    <delete file="${name}.jar"/>
+
+    <move file="${name}.tar.gz" todir="${build.dir}"/>
+    <echo message= "${hadoop.root}/build/contrib/failmon/${name}.jar"/>
+    
+  </target>
+  
+</project>

+ 25 - 0
common/src/contrib/failmon/conf/commons-logging.properties

@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#      http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#Logging Implementation
+
+#Log4J
+org.apache.commons.logging.Log=org.apache.commons.logging.impl.Log4JLogger
+
+#JDK Logger
+#org.apache.commons.logging.Log=org.apache.commons.logging.impl.Jdk14Logger

+ 80 - 0
common/src/contrib/failmon/conf/failmon.properties

@@ -0,0 +1,80 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# HDFS properties
+hdfs.upload.dir = /failmon
+hadoop.conf.path = ../../../conf
+
+# Hadoop Log file properties
+log.hadoop.enabled = true
+log.hadoop.filenames = /home/hadoop/hadoop-0.17.0/logs/
+# set to non-zero only for continous mode:
+log.hadoop.interval = 0
+log.hadoop.dateformat = \\d{4}-\\d{2}-\\d{2}
+log.hadoop.timeformat = \\d{2}:\\d{2}:\\d{2}
+
+# System Log file properties
+log.system.enabled = true
+log.system.filenames = /var/log/messages
+# set to non-zero only for continous mode:
+log.system.interval = 0
+log.system.dateformat = (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s+(\\d+)
+log.system.timeformat = \\d{2}:\\d{2}:\\d{2}
+
+# Network Interfaces
+nic.enabled = true
+nic.list = eth0, eth1
+# set to non-zero only for continous mode:
+nic.interval = 0
+
+# CPUs & Motherboard
+cpu.enabled = true
+# set to non-zero only for continous mode:
+cpu.interval = 0
+
+# Disk devices. For all devices listed under disks.list, the corresponding
+# property disk./dev/xxx.source specifies where the output of 
+# "sudo smartctl --all /dev/xxx" can be read by a user. If this property is
+# missing, super-user privileges are assumed and the smartctl command will be 
+# invoked itself.
+
+disks.enabled = true
+disks.list = /dev/sda, /dev/sdb, /dev/sdc, /dev/sdd, /dev/hda, /dev/hdb, /dev/hdc, /dev/hdd
+#disks./dev/sda.source = hda.smart
+# set to non-zero only for continous mode:
+disks.interval = 0
+
+# lm-sensors polling
+sensors.enabled = true
+# set to non-zero only for continous mode:
+sensors.interval = 0
+
+# Executor thread properties	
+executor.interval.min = 1	
+
+# Anonymization properties
+anonymizer.hash.hostnames = false
+anonymizer.hash.ips = false
+anonymizer.hash.filenames = false
+anonymizer.hostname.suffix = apache.org
+
+# Local files options
+local.tmp.filename = failmon.dat
+local.tmp.compression = false
+# set to non-zero only for continous mode:
+local.upload.interval = 0

+ 39 - 0
common/src/contrib/failmon/conf/global.config

@@ -0,0 +1,39 @@
+[Default]
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# general settings
+
+# the username to use to connect to cluster nodes
+ssh.username = user
+# the maximum number of SSH connections to keep open at any time
+max.connections = 2
+# the directory in which FailMon lies 
+failmon.dir = /home/user/hadoop-core-trunk/src/contrib/failmon
+# the maximum number of HDFS files to allow FailMon to create. After
+# this limit is surpassed, all HDFS files will be concatenated into
+# one file.
+hdfs.files.max = 100
+
+# iteration intervals
+log.hadoop.interval = 0
+log.system.interval = 0
+nics.interval = 10
+cpu.interval = 10
+disks.interval = 0
+sensors.interval = 0
+upload.interval = 20

+ 10 - 0
common/src/contrib/failmon/conf/hosts.list

@@ -0,0 +1,10 @@
+host00
+host01
+host02
+host03
+host04
+host05
+host06
+host07
+host08
+host09

+ 40 - 0
common/src/contrib/failmon/conf/log4j.properties

@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Define some default values that can be overridden by system properties
+failmon.log.dir=logs
+failmon.log.file=failmon.log
+
+log4j.rootLogger= INFO, simpleFile, console
+
+# Logging Threshold
+log4j.threshhold=ALL
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+log4j.appender.simpleFile=org.apache.log4j.FileAppender
+log4j.appender.simpleFile.layout=org.apache.log4j.PatternLayout
+log4j.appender.simpleFile.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+log4j.appender.simpleFile.file= ${failmon.log.dir}/${failmon.log.file}

+ 52 - 0
common/src/contrib/failmon/ivy.xml

@@ -0,0 +1,52 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<ivy-module version="1.0">
+  <info organisation="org.apache.hadoop" module="${ant.project.name}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
+    <description>
+        Apache Hadoop
+    </description>
+  </info>
+  <configurations defaultconfmapping="default">
+    <!--these match the Maven configurations-->
+    <conf name="default" extends="master,runtime"/>
+    <conf name="master" description="contains the artifact but no dependencies"/>
+    <conf name="runtime" description="runtime but not the artifact" />
+
+    <conf name="common" visibility="private" 
+      extends="runtime"
+      description="artifacts needed compile/test the application"/>
+    <conf name="test" visibility="private" extends="runtime"/>
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+  <dependencies>
+    <dependency org="commons-logging"
+      name="commons-logging"
+      rev="${commons-logging.version}"
+      conf="common->default"/>
+    <dependency org="log4j"
+      name="log4j"
+      rev="${log4j.version}"
+      conf="common->master"/>
+  </dependencies>
+</ivy-module>

+ 17 - 0
common/src/contrib/failmon/ivy/libraries.properties

@@ -0,0 +1,17 @@
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+#This properties file lists the versions of the various artifacts used by streaming.
+#It drives ivy and the generation of a maven POM
+
+#Please list the dependencies name with version if they are different from the ones 
+#listed in the global libraries.properties file (in alphabetical order)

+ 154 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Anonymizer.java

@@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.io.UnsupportedEncodingException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+/**********************************************************
+ * This class provides anonymization to SerializedRecord objects. It 
+ * anonymizes all hostnames, ip addresses and file names/paths
+ * that appear in EventRecords gathered from the logs
+ * and other system utilities. Such values are hashed using a
+ * cryptographically safe one-way-hash algorithm (MD5).
+ * 
+ **********************************************************/
+
+public class Anonymizer {
+
+  /**
+	 * Anonymize hostnames, ip addresses and file names/paths
+   * that appear in fields of a SerializedRecord.
+   * 
+	 * @param sr the input SerializedRecord
+	 * 
+	 * @return the anonymized SerializedRecord
+	 */  	
+  public static SerializedRecord anonymize(SerializedRecord sr)
+      throws Exception {
+
+    String hostname = sr.get("hostname");
+
+    if (hostname == null)
+      throw new Exception("Malformed SerializedRecord: no hostname found");
+
+    if ("true".equalsIgnoreCase(Environment
+        .getProperty("anonymizer.hash.hostnames"))) {
+      // hash the node's hostname
+      anonymizeField(sr, "message", hostname, "_hn_");
+      anonymizeField(sr, "hostname", hostname, "_hn_");
+      // hash all other hostnames
+      String suffix = Environment.getProperty("anonymizer.hostname.suffix");
+      if (suffix != null)
+        anonymizeField(sr, "message", "(\\S+\\.)*" + suffix, "_hn_");
+    }
+
+    if ("true".equalsIgnoreCase(Environment.getProperty("anonymizer.hash.ips"))) {
+      // hash all ip addresses
+      String ipPattern = "(\\d{1,3}\\.){3}\\d{1,3}";
+      anonymizeField(sr, "message", ipPattern, "_ip_");
+      anonymizeField(sr, "ips", ipPattern, "_ip_");
+      // if multiple ips are present for a node:
+      int i = 0;
+      while (sr.get("ips" + "#" + i) != null)
+        anonymizeField(sr, "ips" + "#" + i++, ipPattern, "_ip_");
+
+      if ("NIC".equalsIgnoreCase(sr.get("type")))
+        anonymizeField(sr, "ipAddress", ipPattern, "_ip_");
+    }
+
+    if ("true".equalsIgnoreCase(Environment
+        .getProperty("anonymizer.hash.filenames"))) {
+      // hash every filename present in messages
+      anonymizeField(sr, "message", "\\s+/(\\S+/)*[^:\\s]*", " _fn_");
+      anonymizeField(sr, "message", "\\s+hdfs://(\\S+/)*[^:\\s]*",
+          " hdfs://_fn_");
+    }
+
+    return sr;
+  }
+
+  /**
+   * Anonymize hostnames, ip addresses and file names/paths
+   * that appear in fields of an EventRecord, after it gets
+   * serialized into a SerializedRecord.
+   * 
+   * @param er the input EventRecord
+   * 
+   * @return the anonymized SerializedRecord
+   */   
+  public static SerializedRecord anonymize(EventRecord er) throws Exception {
+    return anonymize(new SerializedRecord(er));
+  }
+
+  
+  private static String anonymizeField(SerializedRecord sr, String fieldName,
+      String pattern, String prefix) {
+    String txt = sr.get(fieldName);
+
+    if (txt == null)
+      return null;
+    else {
+      String anon = getMD5Hash(pattern);
+      sr.set(fieldName, txt.replaceAll(pattern, (prefix == null ? "" : prefix)
+          + anon));
+      return txt;
+    }
+  }
+
+  /**
+   * Create the MD5 digest of an input text.
+   * 
+   * @param text the input text
+   * 
+   * @return the hexadecimal representation of the MD5 digest
+   */   
+  public static String getMD5Hash(String text) {
+    MessageDigest md;
+    byte[] md5hash = new byte[32];
+    try {
+      md = MessageDigest.getInstance("MD5");
+      md.update(text.getBytes("iso-8859-1"), 0, text.length());
+      md5hash = md.digest();
+    } catch (NoSuchAlgorithmException e) {
+      e.printStackTrace();
+    } catch (UnsupportedEncodingException e) {
+      e.printStackTrace();
+    }
+    return convertToHex(md5hash);
+  }
+
+  private static String convertToHex(byte[] data) {
+    StringBuilder buf = new StringBuilder();
+    for (int i = 0; i < data.length; i++) {
+      int halfbyte = (data[i] >>> 4) & 0x0F;
+      int two_halfs = 0;
+      do {
+        if ((0 <= halfbyte) && (halfbyte <= 9))
+          buf.append((char) ('0' + halfbyte));
+        else
+          buf.append((char) ('a' + (halfbyte - 10)));
+        halfbyte = data[i] & 0x0F;
+      } while (two_halfs++ < 1);
+    }
+    return buf.toString();
+  }
+
+}

+ 101 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/CPUParser.java

@@ -0,0 +1,101 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.net.InetAddress;
+import java.util.Calendar;
+
+/**********************************************************
+ * Objects of this class parse the /proc/cpuinfo file to 
+ * gather information about present processors in the system.
+ *
+ **********************************************************/
+
+
+public class CPUParser extends ShellParser {
+
+ /**
+  * Constructs a CPUParser
+  */
+  public CPUParser() {
+    super();
+  }
+
+  /**
+   * Reads and parses /proc/cpuinfo and creates an appropriate 
+   * EventRecord that holds the desirable information.
+   * 
+   * @param s unused parameter
+   * 
+   * @return the EventRecord created
+   */
+  public EventRecord query(String s) throws Exception {
+    CharSequence sb = Environment.runCommandGeneric("cat /proc/cpuinfo");
+    EventRecord retval = new EventRecord(InetAddress.getLocalHost()
+        .getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
+        .getHostName()), Calendar.getInstance(), "CPU", "Unknown", "CPU", "-");
+
+    retval.set("processors", findAll("\\s*processor\\s*:\\s*(\\d+)", sb
+        .toString(), 1, ", "));
+
+    retval.set("model name", findPattern("\\s*model name\\s*:\\s*(.+)", sb
+        .toString(), 1));
+
+    retval.set("frequency", findAll("\\s*cpu\\s*MHz\\s*:\\s*(\\d+)", sb
+        .toString(), 1, ", "));
+
+    retval.set("physical id", findAll("\\s*physical\\s*id\\s*:\\s*(\\d+)", sb
+        .toString(), 1, ", "));
+
+    retval.set("core id", findAll("\\s*core\\s*id\\s*:\\s*(\\d+)", sb
+        .toString(), 1, ", "));
+
+    return retval;
+  }
+
+  /**
+   * Invokes query() to do the parsing and handles parsing errors. 
+   * 
+   * @return an array of EventRecords that holds one element that represents
+   * the current state of /proc/cpuinfo
+   */
+  
+  public EventRecord[] monitor() {
+
+    EventRecord[] recs = new EventRecord[1];
+
+    try {
+      recs[0] = query(null);
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+
+    return recs;
+  }
+  
+  /**
+   * Return a String with information about this class
+   * 
+   * @return A String describing this class
+   */
+  public String getInfo() {
+    return ("CPU Info parser");
+  }
+
+}

+ 41 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Continuous.java

@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.io.IOException;
+
+/**********************************************************
+ * This class runs FailMon in a continuous mode on the local
+ * node.
+ * 
+ **********************************************************/
+
+public class Continuous {
+
+  public static void main(String[] args) {
+
+
+    Environment.prepare("failmon.properties");
+
+    Executor ex = new Executor(null);
+    new Thread(ex).start();
+
+  }
+
+}

+ 486 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Environment.java

@@ -0,0 +1,486 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Properties;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.*;
+import org.apache.log4j.PropertyConfigurator;
+
+/**********************************************************
+ * This class provides various methods for interaction with
+ * the configuration and the operating system environment. Also
+ * provides some helper methods for use by other classes in
+ * the package.
+ **********************************************************/
+
+public class Environment {
+
+  public static final int DEFAULT_LOG_INTERVAL = 3600;
+
+  public static final int DEFAULT_POLL_INTERVAL = 360;
+
+  public static int MIN_INTERVAL = 5;
+
+  public static final int MAX_OUTPUT_LENGTH = 51200;
+
+  public static Log LOG;
+  
+  static Properties fmProperties = new Properties();
+
+  static boolean superuser = false;
+
+  static boolean ready = false;
+
+  /**
+   * Initializes structures needed by other methods. Also determines
+   * whether the executing user has superuser privileges. 
+   *  
+   */
+  public static void prepare(String fname) {
+
+    if (!"Linux".equalsIgnoreCase(System.getProperty("os.name"))) {
+      System.err.println("Linux system required for FailMon. Exiting...");
+      System.exit(0);
+    }
+
+    System.setProperty("log4j.configuration", "conf/log4j.properties");
+    PropertyConfigurator.configure("conf/log4j.properties");
+    LOG = LogFactory.getLog("org.apache.hadoop.contrib.failmon");
+    logInfo("********** FailMon started ***********");
+
+    // read parseState file
+    PersistentState.readState("conf/parsing.state");
+    
+    try {
+      FileInputStream propFile = new FileInputStream(fname);
+      fmProperties.load(propFile);
+      propFile.close();
+    } catch (FileNotFoundException e1) {
+      e1.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    ready = true;
+
+    try {
+      String sudo_prompt = "passwd_needed:";
+      String echo_txt = "access_ok";
+      
+      Process p = Runtime.getRuntime().exec("sudo -S -p " + sudo_prompt + " echo " + echo_txt );
+      InputStream inps = p.getInputStream();
+      InputStream errs = p.getErrorStream();
+      
+      while (inps.available() < echo_txt.length() && errs.available() < sudo_prompt.length())
+	Thread.sleep(100);
+
+      byte [] buf;
+      String s;
+      
+      if (inps.available() >= echo_txt.length()) {
+        buf = new byte[inps.available()];
+        inps.read(buf);
+        s = new String(buf);
+        if (s.startsWith(echo_txt)) {
+          superuser = true;
+	  logInfo("Superuser privileges found!");
+	} else {
+	  // no need to read errs
+	  superuser = false;
+	  logInfo("Superuser privileges not found.");
+	}
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (InterruptedException e) {
+      e.printStackTrace();
+    }
+  }
+
+  /**
+   * Fetches the value of a property from the configuration file.
+   * 
+   *  @param key the name of the property
+   *  
+   *  @return the value of the property, if it exists and
+   *  null otherwise
+   */
+  public static String getProperty(String key) {
+    if (!ready)
+      prepare("conf/failmon.properties");
+    return fmProperties.getProperty(key);
+  }
+
+  /**
+   * Sets the value of a property inthe configuration file.
+   * 
+   *  @param key the name of the property
+   *  @param value the new value for the property
+   *  
+   */
+  
+  public static void setProperty(String key, String value) {
+    fmProperties.setProperty(key, value);
+  }
+
+  /**
+   * Scans the configuration file to determine which monitoring
+   * utilities are available in the system. For each one of them, a
+   * job is created. All such jobs are scheduled and executed by
+   * Executor.
+   * 
+   * @return an ArrayList that contains jobs to be executed by theExecutor. 
+   */
+  public static ArrayList<MonitorJob> getJobs() {
+
+    ArrayList<MonitorJob> monitors = new ArrayList<MonitorJob>();
+    int timeInt = 0;
+
+    // for Hadoop Log parsing
+    String [] fnames_r = getProperty("log.hadoop.filenames").split(",\\s*");
+    String tmp = getProperty("log.hadoop.enabled");
+
+    String [] fnames = expandDirs(fnames_r, ".*(.log).*");
+
+    timeInt = setValue("log.hadoop.interval", DEFAULT_LOG_INTERVAL);
+    
+    if ("true".equalsIgnoreCase(tmp) && fnames[0] != null)
+      for (String fname : fnames) {
+        File f = new File(fname);
+        if (f.exists() && f.canRead()) {
+          monitors.add(new MonitorJob(new HadoopLogParser(fname), "hadoopLog", timeInt));
+	  logInfo("Created Monitor for Hadoop log file: " + f.getAbsolutePath());
+	} else if (!f.exists())
+	  logInfo("Skipping Hadoop log file " + fname + " (file not found)");
+	else
+	  logInfo("Skipping Hadoop log file " + fname + " (permission denied)");
+    }
+    
+    
+    // for System Log parsing
+    fnames_r = getProperty("log.system.filenames").split(",\\s*");
+    tmp = getProperty("log.system.enabled");
+
+    fnames = expandDirs(fnames_r, ".*(messages).*");
+
+    timeInt = setValue("log.system.interval", DEFAULT_LOG_INTERVAL);
+    
+    if ("true".equalsIgnoreCase(tmp))
+      for (String fname : fnames) {
+        File f = new File(fname);
+        if (f.exists() && f.canRead()) {
+          monitors.add(new MonitorJob(new SystemLogParser(fname), "systemLog", timeInt));
+	  logInfo("Created Monitor for System log file: " + f.getAbsolutePath());
+        } else if (!f.exists())
+	  logInfo("Skipping system log file " + fname + " (file not found)");
+	else
+	  logInfo("Skipping system log file " + fname + " (permission denied)");
+      }
+        
+
+    // for network interfaces
+    tmp = getProperty("nic.enabled");
+
+    timeInt = setValue("nics.interval", DEFAULT_POLL_INTERVAL);
+    
+    if ("true".equalsIgnoreCase(tmp)) {
+      monitors.add(new MonitorJob(new NICParser(), "nics", timeInt));
+      logInfo("Created Monitor for NICs");
+    }
+
+    // for cpu
+    tmp = getProperty("cpu.enabled");
+
+    timeInt = setValue("cpu.interval", DEFAULT_POLL_INTERVAL);
+    
+    if ("true".equalsIgnoreCase(tmp)) {
+      monitors.add(new MonitorJob(new CPUParser(), "cpu", timeInt));
+      logInfo("Created Monitor for CPUs");
+    }
+
+    // for disks
+    tmp = getProperty("disks.enabled");
+
+    timeInt = setValue("disks.interval", DEFAULT_POLL_INTERVAL);
+    
+    if ("true".equalsIgnoreCase(tmp)) {
+      // check privileges if a disk with no disks./dev/xxx/.source is found
+      boolean smart_present = checkExistence("smartctl");
+      int disks_ok = 0;
+      String devicesStr = getProperty("disks.list");
+      String[] devices = null;
+
+      if (devicesStr != null)
+        devices = devicesStr.split(",\\s*");
+      
+      for (int i = 0; i< devices.length; i++) {
+        boolean file_present = false;
+        boolean disk_present = false;
+        
+        String fileloc = getProperty("disks." + devices[i] + ".source");
+        if (fileloc != null && fileloc.equalsIgnoreCase("true"))
+          file_present = true;
+        
+        if (!file_present) 
+          if (superuser) {
+              CharSequence sb = runCommandGeneric("sudo smartctl -i " + devices[i]);
+              String patternStr = "[(failed)(device not supported)]";
+              Pattern pattern = Pattern.compile(patternStr);
+              Matcher matcher = pattern.matcher(sb.toString());
+              if (matcher.find(0))
+                disk_present = false;
+              else
+                disk_present = true;            
+          }
+        if (file_present || (disk_present && smart_present)) {
+          disks_ok++;
+        } else
+          devices[i] = null;
+      } 
+      
+      // now remove disks that dont exist
+      StringBuilder resetSB = new StringBuilder();
+      for (int j = 0; j < devices.length; j++) {
+        resetSB.append(devices[j] == null ? "" : devices[j] + ", ");
+	if (devices[j] != null)
+	    logInfo("Found S.M.A.R.T. attributes for disk " + devices[j]);
+      }
+      // fix the property
+      if (resetSB.length() >= 2)
+        setProperty("disks.list", resetSB.substring(0, resetSB.length() - 2));
+      
+      if (disks_ok > 0) {
+        monitors.add(new MonitorJob(new SMARTParser(), "disks", timeInt));
+	logInfo("Created Monitor for S.M.A.R.T disk attributes");
+      }
+    }
+
+    // for lm-sensors
+    tmp = getProperty("sensors.enabled");
+
+    timeInt = setValue("sensors.interval", DEFAULT_POLL_INTERVAL);
+    
+    if ("true".equalsIgnoreCase(tmp) && checkExistence("sensors")) {
+      monitors.add(new MonitorJob(new SensorsParser(), "sensors", timeInt));
+      logInfo("Created Monitor for lm-sensors output");
+    }
+
+    return monitors;
+  }
+
+  /**
+   * Determines the minimum interval at which the executor thread
+   * needs to wake upto execute jobs. Essentially, this is interval 
+   * equals the GCD of intervals of all scheduled jobs. 
+   * 
+   *  @param monitors the list of scheduled jobs
+   *  
+   *  @return the minimum interval between two scheduled jobs
+   */
+  public static int getInterval(ArrayList<MonitorJob> monitors) {
+    String tmp = getProperty("executor.interval.min");
+    if (tmp != null)
+      MIN_INTERVAL = Integer.parseInt(tmp);
+
+    int[] monIntervals = new int[monitors.size()];
+
+    for (int i = 0; i < monitors.size(); i++)
+      monIntervals[i] = monitors.get(i).interval;
+
+    return Math.max(MIN_INTERVAL, gcd(monIntervals));
+  }
+
+  /**
+   * Checks whether a specific shell command is available
+   * in the system. 
+   * 
+   *  @param cmd the command to check against
+   *
+   *  @return true, if the command is availble, false otherwise
+   */
+  public static boolean checkExistence(String cmd) {
+    CharSequence sb = runCommandGeneric("which " + cmd);
+    if (sb.length() > 1)
+      return true;
+
+    return false;
+  }
+
+  /**
+   * Runs a shell command in the system and provides a StringBuilder
+   * with the output of the command.
+   * <p>This method is deprecated. See related method that returns a CharSequence as oppposed to a StringBuffer.
+   * 
+   *  @param cmd an array of string that form the command to run 
+   *  
+   *  @return a text that contains the output of the command 
+   *  @see #runCommandGeneric(String[])
+   *  @deprecated
+   */
+  public static StringBuffer runCommand(String[] cmd) {
+    return new StringBuffer(runCommandGeneric(cmd));
+  }
+
+  /**
+   * Runs a shell command in the system and provides a StringBuilder
+   * with the output of the command.
+   * 
+   *  @param cmd an array of string that form the command to run 
+   *  
+   *  @return a text that contains the output of the command 
+   */
+  public static CharSequence runCommandGeneric(String[] cmd) {
+    StringBuilder retval = new StringBuilder(MAX_OUTPUT_LENGTH);
+    Process p;
+    try {
+      p = Runtime.getRuntime().exec(cmd);
+      InputStream tmp = p.getInputStream();
+      p.waitFor();
+      int c;
+      while ((c = tmp.read()) != -1)
+        retval.append((char) c);
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (InterruptedException e) {
+      e.printStackTrace();
+    }
+
+    return retval;
+  }
+  
+  /**
+   * Runs a shell command in the system and provides a StringBuilder
+   * with the output of the command.
+   * <p>This method is deprecated in favor of the one that returns CharSequence as opposed to StringBuffer
+   *  @param cmd the command to run 
+   *  
+   *  @return a text that contains the output of the command 
+   *  @see #runCommandGeneric(String)
+   *  @deprecated
+   */
+  public static StringBuffer runCommand(String cmd) {
+    return new StringBuffer(runCommandGeneric(cmd));
+  }
+
+  /**
+   * Runs a shell command in the system and provides a StringBuilder
+   * with the output of the command.
+   * 
+   *  @param cmd the command to run 
+   *  
+   *  @return a text that contains the output of the command 
+   */
+  public static CharSequence runCommandGeneric(String cmd) {
+    return runCommandGeneric(cmd.split("\\s+"));
+  }  
+  /**
+   * Determines the greatest common divisor (GCD) of two integers.
+   * 
+   *  @param m the first integer
+   *  @param n the second integer
+   *  
+   *  @return the greatest common divisor of m and n
+   */
+  public static int gcd(int m, int n) {
+    if (m == 0 && n == 0)
+      return 0;
+    if (m < n) {
+      int t = m;
+      m = n;
+      n = t;
+    }
+    int r = m % n;
+    if (r == 0) {
+      return n;
+    } else {
+      return gcd(n, r);
+    }
+  }
+
+  /**
+   * Determines the greatest common divisor (GCD) of a list
+   * of integers.
+   * 
+   *  @param numbers the list of integers to process
+   *  
+   *  @return the greatest common divisor of all numbers
+   */
+  public static int gcd(int[] numbers) {
+
+    if (numbers.length == 1)
+      return numbers[0];
+
+    int g = gcd(numbers[0], numbers[1]);
+
+    for (int i = 2; i < numbers.length; i++)
+      g = gcd(g, numbers[i]);
+
+    return g;
+  }
+
+  private static String [] expandDirs(String [] input, String patternStr) {
+
+    ArrayList<String> fnames = new ArrayList<String>();
+    Pattern pattern = Pattern.compile(patternStr);
+    Matcher matcher;
+    File f;
+    
+    for (String fname : input) {
+      f = new File(fname);
+      if (f.exists()) {
+	if (f.isDirectory()) {
+	  // add all matching files
+	  File [] fcs = f.listFiles();
+	  for (File fc : fcs) {
+	    matcher = pattern.matcher(fc.getName());
+	    if (matcher.find() && fc.isFile())
+	      fnames.add(fc.getAbsolutePath());
+	  }
+	} else {
+	  // normal file, just add to output
+	  fnames.add(f.getAbsolutePath());
+	}
+      }
+    }
+    return fnames.toArray(input);
+  }
+
+  private static int setValue(String propname, int defaultValue) {
+
+    String v = getProperty(propname);
+
+    if (v != null)
+      return Integer.parseInt(v);
+    else
+      return defaultValue;
+  }
+
+  
+  public static void logInfo(String str) {
+    LOG.info(str);
+  }
+}

+ 151 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/EventRecord.java

@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.HashMap;
+
+/**********************************************************
+ * Objects of this class represent metrics collected for 
+ * a specific hardware source. Each EventRecord contains a HashMap of 
+ * (key, value) pairs, each of which represents a property of
+ * the metered value. For instance, when parsing a log file, an
+ * EventRecord is created for each log entry, which contains 
+ * the hostname and the ip addresses of the node, timestamp of
+ * the log entry, the actual message etc. Each and every EventRecord
+ * contains the hostname of the machine on which it was collected,
+ * its IP address and the time of collection.
+ * 
+ * The main purpose of this class is to provide a uniform format
+ * for records collected from various system compontents (logs,
+ * ifconfig, smartmontools, lm-sensors etc). All metric values are 
+ * converted into this format after they are collected by a
+ * Monitored object.
+ *
+ **********************************************************/
+
+public class EventRecord {
+
+  HashMap<String, Object> fields;
+
+  /**
+   * Create the EventRecord given the most common properties
+   * among different metric types.
+   */
+  public EventRecord(String _hostname, Object [] _ips, Calendar _timestamp,
+      String _type, String _logLevel, String _source, String _message) {
+    fields = new HashMap<String, Object>();
+    fields.clear();
+    set("hostname", _hostname);
+    set("ips", _ips);
+    set("timestamp", _timestamp);
+    set("type", _type);
+    set("logLevel", _logLevel);
+    set("source", _source);
+    set("message", _message);
+  }
+
+  /**
+   * Create the EventRecord with no fields other than "invalid" as
+   * the hostname. This is only used as a dummy.
+   */
+  public EventRecord() {
+    // creates an invalid record
+    fields = new HashMap<String, Object>();
+    fields.clear();
+    set("hostname", "invalid");
+  }
+
+  /**
+   * Return the HashMap of properties of the EventRecord.
+   * 
+   * @return a HashMap that contains all properties of the record.
+   */
+  public final HashMap<String, Object> getMap() {
+    return fields;
+  }
+
+  /**
+   * Set the value of a property of the EventRecord.
+   * 
+   * @param fieldName the name of the property to set
+   * @param fieldValue the value of the property to set
+   * 
+   */
+  public void set(String fieldName, Object fieldValue) {
+    if (fieldValue != null)
+      fields.put(fieldName, fieldValue);
+  }
+
+  /**
+   * Get the value of a property of the EventRecord.
+   * If the property with the specific key is not found,
+   * null is returned.
+   * 
+   * @param fieldName the name of the property to get.
+   */
+  public Object get(String fieldName) {
+    return fields.get(fieldName);
+  }
+
+  /**
+   * Check if the EventRecord is a valid one, i.e., whether
+   * it represents meaningful metric values.
+   * 
+   * @return true if the EventRecord is a valid one, false otherwise.
+   */
+  public boolean isValid() {
+    return !("invalid".equalsIgnoreCase((String) fields.get("hostname")));
+  }
+
+  /**
+   * Creates and returns a string representation of the object.
+   * 
+   * @return a String representation of the object
+   */
+
+  public String toString() {
+    String retval = "";
+    ArrayList<String> keys = new ArrayList<String>(fields.keySet());
+    Collections.sort(keys);
+
+    for (int i = 0; i < keys.size(); i++) {
+      Object value = fields.get(keys.get(i));
+      if (value == null)
+        retval += keys.get(i) + ":\tnull\n";
+      else if (value instanceof String)
+        retval += keys.get(i) + ":\t" + value + "\n";
+      else if (value instanceof Calendar)
+        retval += keys.get(i) + ":\t" + ((Calendar) value).getTime() + "\n";
+      else if (value instanceof InetAddress[] || value instanceof String []) {
+        retval += "Known IPs:\t";
+        for (InetAddress ip : ((InetAddress[]) value))
+          retval += ip.getHostAddress() + " ";
+        retval += "\n";
+      } else {
+        retval += keys.get(i) + ":\t" + value.toString() + "\n";
+      }
+    }
+    return retval;
+  }
+
+}

+ 120 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Executor.java

@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.util.ArrayList;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**********************************************************
+ * This class executes monitoring jobs on all nodes of the
+ * cluster, on which we intend to gather failure metrics. 
+ * It is basically a thread that sleeps and periodically wakes
+ * up to execute monitoring jobs and ship all gathered data to 
+ * a "safe" location, which in most cases will be the HDFS 
+ * filesystem of the monitored cluster.
+ * 
+ **********************************************************/
+
+public class Executor implements Runnable {
+
+  public static final int DEFAULT_LOG_INTERVAL = 3600;
+
+  public static final int DEFAULT_POLL_INTERVAL = 360;
+
+  public static int MIN_INTERVAL = 5;
+
+  public static int instances = 0;
+
+  LocalStore lstore;
+
+  ArrayList<MonitorJob> monitors;
+  
+  int interval;
+
+  int upload_interval;
+  int upload_counter;
+  
+  /**
+   * Create an instance of the class and read the configuration
+   * file to determine the set of jobs that will be run and the 
+   * maximum interval for which the thread can sleep before it 
+   * wakes up to execute a monitoring job on the node.
+   * 
+   */ 
+
+  public Executor(Configuration conf) {
+    
+    Environment.prepare("conf/failmon.properties");
+    
+    String localTmpDir;
+    
+    if (conf == null) {
+      // running as a stand-alone application
+      localTmpDir = System.getProperty("java.io.tmpdir");
+      Environment.setProperty("local.tmp.dir", localTmpDir);
+    } else {
+      // running from within Hadoop
+      localTmpDir = conf.get("hadoop.tmp.dir");
+      String hadoopLogPath = System.getProperty("hadoop.log.dir") + "/" + System.getProperty("hadoop.log.file");
+      Environment.setProperty("hadoop.log.file", hadoopLogPath);
+      Environment.setProperty("local.tmp.dir", localTmpDir);
+    }
+    
+    monitors = Environment.getJobs();
+    interval = Environment.getInterval(monitors);
+    upload_interval = LocalStore.UPLOAD_INTERVAL;
+    lstore = new LocalStore();
+    
+    if (Environment.getProperty("local.upload.interval") != null) 
+     upload_interval = Integer.parseInt(Environment.getProperty("local.upload.interval"));
+
+    instances++;
+  }
+
+  public void run() {
+    upload_counter = upload_interval;
+
+    Environment.logInfo("Failmon Executor thread started successfully.");
+    while (true) {
+      try {
+        Thread.sleep(interval * 1000);
+        for (int i = 0; i < monitors.size(); i++) {
+          monitors.get(i).counter -= interval;
+          if (monitors.get(i).counter <= 0) {
+            monitors.get(i).reset();
+            Environment.logInfo("Calling " + monitors.get(i).job.getInfo() + "...\t");
+            monitors.get(i).job.monitor(lstore);
+          }
+        }
+        upload_counter -= interval;
+        if (upload_counter <= 0) {
+          lstore.upload();
+          upload_counter = upload_interval;
+        }
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+    }
+  }
+
+  public void cleanup() {
+    instances--;   
+  }
+}

+ 154 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/HDFSMerger.java

@@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.io.BufferedOutputStream;
+import java.io.InputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.FileNotFoundException;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.zip.CRC32;
+import java.util.zip.CheckedOutputStream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FSDataInputStream;
+
+public class HDFSMerger {
+
+  Configuration hadoopConf;
+  FileSystem hdfs;
+  
+  String hdfsDir;
+  
+  FileStatus [] inputFiles;
+
+  Path outputFilePath;
+  FSDataOutputStream outputFile;
+    
+  boolean compress;
+
+  FileWriter fw;
+
+  BufferedWriter writer;
+
+  public HDFSMerger() throws IOException {
+
+    String hadoopConfPath; 
+
+    if (Environment.getProperty("hadoop.conf.path") == null)
+      hadoopConfPath = "../../../conf";
+    else
+      hadoopConfPath = Environment.getProperty("hadoop.conf.path");
+
+    // Read the configuration for the Hadoop environment
+    Configuration hadoopConf = new Configuration();
+    hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-default.xml"));
+    hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-site.xml"));
+    
+    // determine the local output file name
+    if (Environment.getProperty("local.tmp.filename") == null)
+      Environment.setProperty("local.tmp.filename", "failmon.dat");
+    
+    // determine the upload location
+    hdfsDir = Environment.getProperty("hdfs.upload.dir");
+    if (hdfsDir == null)
+      hdfsDir = "/failmon";
+
+    hdfs = FileSystem.get(hadoopConf);
+    
+    Path hdfsDirPath = new Path(hadoopConf.get("fs.default.name") + hdfsDir);
+
+    try {
+      if (!hdfs.getFileStatus(hdfsDirPath).isDir()) {
+	Environment.logInfo("HDFSMerger: Not an HDFS directory: " + hdfsDirPath.toString());
+	System.exit(0);
+      }
+    } catch (FileNotFoundException e) {
+      Environment.logInfo("HDFSMerger: Directory not found: " + hdfsDirPath.toString());
+    }
+
+    inputFiles = hdfs.listStatus(hdfsDirPath);
+
+    outputFilePath = new Path(hdfsDirPath.toString() + "/" + "merge-"
+			  + Calendar.getInstance().getTimeInMillis() + ".dat");
+    outputFile = hdfs.create(outputFilePath);
+    
+    for (FileStatus fstatus : inputFiles) {
+      appendFile(fstatus.getPath());
+      hdfs.delete(fstatus.getPath(), true);
+    }
+
+    outputFile.close();
+
+    Environment.logInfo("HDFS file merging complete!");
+  }
+
+  private void appendFile (Path inputPath) throws IOException {
+    
+    FSDataInputStream anyInputFile = hdfs.open(inputPath);
+    InputStream inputFile;
+    byte buffer[] = new byte[4096];
+    
+    if (inputPath.toString().endsWith(LocalStore.COMPRESSION_SUFFIX)) {
+      // the file is compressed
+      inputFile = new ZipInputStream(anyInputFile);
+      ((ZipInputStream) inputFile).getNextEntry();
+    } else {
+      inputFile = anyInputFile;
+    }
+    
+    try {
+      int bytesRead = 0;
+      while ((bytesRead = inputFile.read(buffer)) > 0) {
+	outputFile.write(buffer, 0, bytesRead);
+      }
+    } catch (IOException e) {
+      Environment.logInfo("Error while copying file:" + inputPath.toString());
+    } finally {
+      inputFile.close();
+    }    
+  }
+
+  
+  public static void main(String [] args) {
+
+    Environment.prepare("./conf/failmon.properties");
+
+    try {
+      new HDFSMerger();
+    } catch (IOException e) {
+      e.printStackTrace();
+      }
+
+  }
+}

+ 136 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/HadoopLogParser.java

@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.io.IOException;
+import java.util.Calendar;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**********************************************************
+ * An object of this class parses a Hadoop log file to create
+ * appropriate EventRecords. The log file can either be the log 
+ * of a NameNode or JobTracker or DataNode or TaskTracker.
+ * 
+ **********************************************************/
+
+public class HadoopLogParser extends LogParser {
+
+  /**
+   * Create a new parser object and try to find the hostname
+   * of the node that generated the log
+   */
+  public HadoopLogParser(String fname) {
+    super(fname);
+    if ((dateformat = Environment.getProperty("log.hadoop.dateformat")) == null)
+      dateformat = "\\d{4}-\\d{2}-\\d{2}";
+    if ((timeformat = Environment.getProperty("log.hadoop.timeformat")) == null)
+      timeformat = "\\d{2}:\\d{2}:\\d{2}";
+    findHostname();
+  }
+
+  /**
+   * Parses one line of the log. If the line contains a valid 
+   * log entry, then an appropriate EventRecord is returned, after all
+   * relevant fields have been parsed.
+   *
+   *  @param line the log line to be parsed
+   *
+   *  @return the EventRecord representing the log entry of the line. If 
+   *  the line does not contain a valid log entry, then the EventRecord 
+   *  returned has isValid() = false. When the end-of-file has been reached,
+   *  null is returned to the caller.
+   */
+  public EventRecord parseLine(String line) throws IOException {
+    EventRecord retval = null;
+
+    if (line != null) {
+      // process line
+      String patternStr = "(" + dateformat + ")";
+      patternStr += "\\s+";
+      patternStr += "(" + timeformat + ")";
+      patternStr += ".{4}\\s(\\w*)\\s"; // for logLevel
+      patternStr += "\\s*([\\w+\\.?]+)"; // for source
+      patternStr += ":\\s+(.+)"; // for the message
+      Pattern pattern = Pattern.compile(patternStr);
+      Matcher matcher = pattern.matcher(line);
+
+      if (matcher.find(0) && matcher.groupCount() >= 5) {
+        retval = new EventRecord(hostname, ips, parseDate(matcher.group(1),
+            matcher.group(2)),
+	    "HadoopLog",
+	    matcher.group(3), // loglevel
+            matcher.group(4), // source
+            matcher.group(5)); // message
+      } else {
+        retval = new EventRecord();
+      }
+    }
+
+    return retval;
+  }
+
+  /**
+   * Parse a date found in the Hadoop log.
+   * 
+   * @return a Calendar representing the date
+   */
+  protected Calendar parseDate(String strDate, String strTime) {
+    Calendar retval = Calendar.getInstance();
+    // set date
+    String[] fields = strDate.split("-");
+    retval.set(Calendar.YEAR, Integer.parseInt(fields[0]));
+    retval.set(Calendar.MONTH, Integer.parseInt(fields[1]));
+    retval.set(Calendar.DATE, Integer.parseInt(fields[2]));
+    // set time
+    fields = strTime.split(":");
+    retval.set(Calendar.HOUR_OF_DAY, Integer.parseInt(fields[0]));
+    retval.set(Calendar.MINUTE, Integer.parseInt(fields[1]));
+    retval.set(Calendar.SECOND, Integer.parseInt(fields[2]));
+    return retval;
+  }
+
+  /**
+   * Attempt to determine the hostname of the node that created the
+   * log file. This information can be found in the STARTUP_MSG lines 
+   * of the Hadoop log, which are emitted when the node starts.
+   * 
+   */
+  private void findHostname() {
+    String startupInfo = Environment.runCommandGeneric(
+        "grep --max-count=1 STARTUP_MSG:\\s*host " + file.getName()).toString();
+    Pattern pattern = Pattern.compile("\\s+(\\w+/.+)\\s+");
+    Matcher matcher = pattern.matcher(startupInfo);
+    if (matcher.find(0)) {
+      hostname = matcher.group(1).split("/")[0];
+      ips = new String[1];
+      ips[0] = matcher.group(1).split("/")[1];
+    }
+  }
+  
+  /**
+   * Return a String with information about this class
+   * 
+   * @return A String describing this class
+   */
+  public String getInfo() {
+    return ("Hadoop Log Parser for file: " + file.getName());
+  }
+
+}

+ 282 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/LocalStore.java

@@ -0,0 +1,282 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.zip.CRC32;
+import java.util.zip.CheckedOutputStream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**********************************************************
+ * This class takes care of the temporary local storage of 
+ * gathered metrics before they get uploaded into HDFS. It writes 
+ * Serialized Records as lines in a temporary file and then 
+ * compresses and uploads it into HDFS.
+ * 
+ **********************************************************/
+
+public class LocalStore {
+
+  public final static char FIELD_SEPARATOR = '|';
+
+  public final static char RECORD_SEPARATOR = '\n';
+
+  public final static String COMPRESSION_SUFFIX = ".zip";
+
+  public final static int UPLOAD_INTERVAL = 600;
+
+  String filename;
+  String hdfsDir;
+
+  boolean compress;
+
+  FileWriter fw;
+
+  BufferedWriter writer;
+
+  /**
+   * Create an instance of the class and read the configuration
+   * file to determine some output parameters. Then, initiate the 
+   * structured needed for the buffered I/O (so that smal appends
+   * can be handled efficiently).
+   * 
+   */ 
+
+  public LocalStore() {
+    // determine the local output file name
+    if (Environment.getProperty("local.tmp.filename") == null)
+      Environment.setProperty("local.tmp.filename", "failmon.dat");
+    
+    // local.tmp.dir has been set by the Executor
+    if (Environment.getProperty("local.tmp.dir") == null)
+      Environment.setProperty("local.tmp.dir", System.getProperty("java.io.tmpdir"));
+    
+    filename = Environment.getProperty("local.tmp.dir") + "/" +
+      Environment.getProperty("local.tmp.filename");
+
+    // determine the upload location
+    hdfsDir = Environment.getProperty("hdfs.upload.dir");
+    if (hdfsDir == null)
+      hdfsDir = "/failmon";
+
+    // determine if compression is enabled
+    compress = true;
+    if ("false".equalsIgnoreCase(Environment
+        .getProperty("local.tmp.compression")))
+      compress = false;
+
+    try {
+      fw = new FileWriter(filename, true);
+      writer = new BufferedWriter(fw);
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+  /**
+   * Insert an EventRecord to the local storage, after it
+   * gets serialized and anonymized.
+   * 
+   * @param er the EventRecord to be inserted
+   */ 
+  
+  public void insert(EventRecord er) {
+    SerializedRecord sr = new SerializedRecord(er);
+    try {
+      Anonymizer.anonymize(sr);
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+    append(sr);
+  }
+
+  /**
+   * Insert an array of EventRecords to the local storage, after they
+   * get serialized and anonymized.
+   * 
+   * @param ers the array of EventRecords to be inserted
+   */
+  public void insert(EventRecord[] ers) {
+    for (EventRecord er : ers)
+      insert(er);
+  }
+
+  private void append(SerializedRecord sr) {
+    try {
+      writer.write(pack(sr).toString());
+      writer.write(RECORD_SEPARATOR);
+      // writer.flush();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+  /**
+   * Pack a SerializedRecord into an array of bytes
+   * <p>
+   * This method is deprecated. 
+   * @param sr the SerializedRecord to be packed
+   * @return Packed representation fo the Serialized Record
+   * @see #packConcurrent(SerializedRecord)
+   * @deprecated
+   */
+  public static StringBuffer pack(SerializedRecord sr) {
+    return new StringBuffer(packConcurrent(sr));
+  }
+
+  /**
+   * Pack a SerializedRecord into an array of bytes
+   * 
+   * @param sr the SerializedRecord to be packed
+   * @return Packed representation fo the Serialized Record
+   */
+  public static CharSequence packConcurrent(SerializedRecord sr) {
+    StringBuilder sb = new StringBuilder();
+
+    ArrayList<String> keys = new ArrayList<String>(sr.fields.keySet());
+
+    if (sr.isValid())
+      SerializedRecord.arrangeKeys(keys);
+
+    for (int i = 0; i < keys.size(); i++) {
+      String value = sr.fields.get(keys.get(i));
+      sb.append(keys.get(i) + ":" + value);
+      sb.append(FIELD_SEPARATOR);
+    }
+    return sb;
+  }
+  
+  /**
+   * Upload the local file store into HDFS, after it 
+   * compressing it. Then a new local file is created 
+   * as a temporary record store.
+   * 
+   */
+  public void upload() {
+    try {
+      writer.flush();
+      if (compress)
+        zipCompress(filename);
+      String remoteName = "failmon-";
+      if ("true".equalsIgnoreCase(Environment.getProperty("anonymizer.hash.hostnames")))
+        remoteName += Anonymizer.getMD5Hash(InetAddress.getLocalHost().getCanonicalHostName()) + "-";
+      else
+        remoteName += InetAddress.getLocalHost().getCanonicalHostName() + "-"; 
+      remoteName += Calendar.getInstance().getTimeInMillis();//.toString();
+      if (compress)
+	copyToHDFS(filename + COMPRESSION_SUFFIX, hdfsDir + "/" + remoteName + COMPRESSION_SUFFIX);
+      else
+	copyToHDFS(filename, hdfsDir + "/" + remoteName);
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+
+    // delete and re-open
+    try {
+      fw.close();
+      fw = new FileWriter(filename);
+      writer = new BufferedWriter(fw);
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+  
+  /**
+   * Compress a text file using the ZIP compressing algorithm.
+   * 
+   * @param filename the path to the file to be compressed
+   */
+  public static void zipCompress(String filename) throws IOException {
+    FileOutputStream fos = new FileOutputStream(filename + COMPRESSION_SUFFIX);
+    CheckedOutputStream csum = new CheckedOutputStream(fos, new CRC32());
+    ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(csum));
+    out.setComment("Failmon records.");
+
+    BufferedReader in = new BufferedReader(new FileReader(filename));
+    out.putNextEntry(new ZipEntry(new File(filename).getName()));
+    int c;
+    while ((c = in.read()) != -1)
+      out.write(c);
+    in.close();
+
+    out.finish();
+    out.close();
+  }
+
+  /**
+   * Copy a local file to HDFS
+   * 
+   * @param localFile the filename of the local file
+   * @param hdfsFile the HDFS filename to copy to
+   */
+  public static void copyToHDFS(String localFile, String hdfsFile) throws IOException {
+
+    String hadoopConfPath; 
+
+    if (Environment.getProperty("hadoop.conf.path") == null)
+      hadoopConfPath = "../../../conf";
+    else
+      hadoopConfPath = Environment.getProperty("hadoop.conf.path");
+
+    // Read the configuration for the Hadoop environment
+    Configuration hadoopConf = new Configuration();
+    hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-default.xml"));
+    hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-site.xml"));
+
+    // System.out.println(hadoopConf.get("hadoop.tmp.dir"));
+    // System.out.println(hadoopConf.get("fs.default.name"));
+    FileSystem fs = FileSystem.get(hadoopConf);
+
+    // HadoopDFS deals with Path
+    Path inFile = new Path("file://" + localFile);
+    Path outFile = new Path(hadoopConf.get("fs.default.name") + hdfsFile);
+
+     // Read from and write to new file
+    Environment.logInfo("Uploading to HDFS (file " + outFile + ") ...");
+    fs.copyFromLocalFile(false, inFile, outFile);
+  }
+
+  /**
+   * Close the temporary local file
+   * 
+   */ 
+  public void close() {
+    try {
+    writer.flush();
+    writer.close();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+}

+ 214 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/LogParser.java

@@ -0,0 +1,214 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Calendar;
+
+/**********************************************************
+ * This class represents objects that provide log parsing 
+ * functionality. Typically, such objects read log files line
+ * by line and for each log entry they identify, they create a 
+ * corresponding EventRecord. In this way, disparate log files
+ * can be merged using the uniform format of EventRecords and can,
+ * thus, be processed in a uniform way.
+ * 
+ **********************************************************/
+
+public abstract class LogParser implements Monitored {
+
+  File file;
+
+  BufferedReader reader;
+
+  String hostname;
+
+  Object [] ips;
+
+  String dateformat;
+
+  String timeformat;
+
+  private String firstLine;
+  private long offset;
+
+  /**
+   * Create a parser that will read from the specified log file.
+   * 
+   * @param fname the filename of the log file to be read
+   */
+  public LogParser(String fname) {
+    file = new File(fname);
+
+    ParseState ps = PersistentState.getState(file.getAbsolutePath());
+    firstLine = ps.firstLine;
+    offset = ps.offset;
+    
+    try {
+      reader = new BufferedReader(new FileReader(file));
+      checkForRotation();
+      Environment.logInfo("Checked for rotation...");
+      reader.skip(offset);
+    } catch (FileNotFoundException e) {
+      System.err.println(e.getMessage());
+      e.printStackTrace();
+    } catch (IOException e) {
+      System.err.println(e.getMessage());
+      e.printStackTrace();
+    }
+
+    setNetworkProperties();
+  }
+
+  protected void setNetworkProperties() {
+    // determine hostname and ip addresses for the node
+    try {
+      // Get hostname
+      hostname = InetAddress.getLocalHost().getCanonicalHostName();
+      // Get all associated ip addresses
+      ips = InetAddress.getAllByName(hostname);
+
+    } catch (UnknownHostException e) {
+      e.printStackTrace();
+    }
+  }
+
+  /**
+   * Insert all EventRecords that can be extracted for
+   * the represented hardware component into a LocalStore.
+   * 
+   * @param ls the LocalStore into which the EventRecords 
+   * are to be stored.
+   */
+  public void monitor(LocalStore ls) {
+    int in = 0;
+    EventRecord er = null;
+    Environment.logInfo("Started processing log...");
+
+    while ((er = getNext()) != null) {
+      // Environment.logInfo("Processing log line:\t" + in++);
+      if (er.isValid()) {
+        ls.insert(er);
+      }
+    }
+
+    PersistentState.updateState(file.getAbsolutePath(), firstLine, offset);
+    PersistentState.writeState("conf/parsing.state");
+  }
+
+  /**
+   * Get an array of all EventRecords that can be extracted for
+   * the represented hardware component.
+   * 
+   * @return The array of EventRecords
+   */
+  public EventRecord[] monitor() {
+
+    ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
+    EventRecord er;
+
+    while ((er = getNext()) != null)
+      recs.add(er);
+
+    EventRecord[] T = new EventRecord[recs.size()];
+
+    return recs.toArray(T);
+  }
+
+  /**
+   * Continue parsing the log file until a valid log entry is identified.
+   * When one such entry is found, parse it and return a corresponding EventRecord.
+   * 
+   *  
+   * @return The EventRecord corresponding to the next log entry
+   */
+  public EventRecord getNext() {
+    try {
+	String line = reader.readLine();
+	if (line != null) {
+	    if (firstLine == null)
+		firstLine = new String(line);
+	    offset += line.length() + 1;
+	    return parseLine(line);
+	}
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    return null;
+  }
+
+  /**
+   * Return the BufferedReader, that reads the log file
+   *  
+   * @return The BufferedReader that reads the log file
+   */
+  public BufferedReader getReader() {
+    return reader;
+  }
+
+  /**
+   * Check whether the log file has been rotated. If so,
+   * start reading the file from the beginning.
+   *  
+   */
+  public void checkForRotation() {
+    try {
+      BufferedReader probe = new BufferedReader(new FileReader(file.getAbsoluteFile()));
+      if (firstLine == null || (!firstLine.equals(probe.readLine()))) {
+	probe.close();
+	// start reading the file from the beginning
+        reader.close();
+        reader = new BufferedReader(new FileReader(file.getAbsoluteFile()));
+	firstLine = null;
+	offset = 0;
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+  /**
+   * Parses one line of the log. If the line contains a valid 
+   * log entry, then an appropriate EventRecord is returned, after all
+   * relevant fields have been parsed.
+   *
+   *  @param line the log line to be parsed
+   *
+   *  @return the EventRecord representing the log entry of the line. If 
+   *  the line does not contain a valid log entry, then the EventRecord 
+   *  returned has isValid() = false. When the end-of-file has been reached,
+   *  null is returned to the caller.
+   */
+  abstract public EventRecord parseLine(String line) throws IOException;
+
+  /**
+   * Parse a date found in Hadoop log file.
+   * 
+   * @return a Calendar representing the date
+   */
+  abstract protected Calendar parseDate(String strDate, String strTime);
+
+}

+ 43 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/MonitorJob.java

@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+/**********************************************************
+ * This class is a wrapper for a monitoring job. 
+ * 
+ **********************************************************/
+
+public class MonitorJob {
+  Monitored job;
+
+  String type;
+  int interval;
+  int counter;
+
+  public MonitorJob(Monitored _job, String _type, int _interval) {
+    job = _job;
+    type = _type;
+    interval = _interval;
+    counter = _interval;
+  }
+
+  public void reset() {
+    counter = interval;
+  }
+}

+ 53 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Monitored.java

@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+/**********************************************************
+ * Represents objects that monitor specific hardware resources and
+ * can query them to get EventRecords describing the state of these
+ * resources.
+ *
+ **********************************************************/
+
+public interface Monitored {
+  /**
+   * Get an array of all EventRecords that can be extracted for
+   * the represented hardware component.
+   * 
+   * @return The array of EventRecords
+   */
+  public EventRecord[] monitor();
+  
+  /**
+   * Inserts all EventRecords that can be extracted for
+   * the represented hardware component into a LocalStore.
+   * 
+   * @param ls the LocalStore into which the EventRecords 
+   * are to be stored.
+   */
+  public void monitor(LocalStore ls);
+  
+  /**
+   * Return a String with information about the implementing
+   * class 
+   * 
+   * @return A String describing the implementing class
+   */
+  public String getInfo();
+}

+ 140 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/NICParser.java

@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Calendar;
+
+/**********************************************************
+ * Objects of this class parse the output of ifconfig to 
+ * gather information about present Network Interface Cards
+ * in the system. The list of NICs to poll is specified in the 
+ * configuration file.
+ * 
+ **********************************************************/
+
+
+public class NICParser extends ShellParser {
+
+  String[] nics;
+
+  /**
+   * Constructs a NICParser and reads the list of NICs to query
+   */
+  public NICParser() {
+    super();
+    nics = Environment.getProperty("nic.list").split(",\\s*");
+  }
+
+  /**
+   * Reads and parses the output of ifconfig for a specified NIC and 
+   * creates an appropriate EventRecord that holds the desirable 
+   * information for it.
+   * 
+   * @param device the NIC device name to query
+   * 
+   * @return the EventRecord created
+   */
+  public EventRecord query(String device) throws UnknownHostException {
+    CharSequence sb = Environment.runCommandGeneric("/sbin/ifconfig " + device);
+    EventRecord retval = new EventRecord(InetAddress.getLocalHost()
+        .getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
+        .getHostName()), Calendar.getInstance(), "NIC", "Unknown", device, "-");
+
+    retval.set("hwAddress", findPattern("HWaddr\\s*([\\S{2}:]{17})", sb
+        .toString(), 1));
+
+    retval.set("ipAddress", findPattern("inet\\s+addr:\\s*([\\w.?]*)", sb
+        .toString(), 1));
+
+    String tmp = findPattern("inet\\s+addr:\\s*([\\w.?]*)", sb.toString(), 1);
+    retval.set("status", (tmp == null) ? "DOWN" : "UP");
+    if (tmp != null)
+      retval.set("ipAddress", tmp);
+
+    retval.set("rxPackets", findPattern("RX\\s*packets\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+    retval.set("rxErrors", findPattern("RX.+errors\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+    retval.set("rxDropped", findPattern("RX.+dropped\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+    retval.set("rxOverruns", findPattern("RX.+overruns\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+    retval.set("rxFrame", findPattern("RX.+frame\\s*:\\s*(\\d+)",
+        sb.toString(), 1));
+
+    retval.set("txPackets", findPattern("TX\\s*packets\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+    retval.set("txErrors", findPattern("TX.+errors\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+    retval.set("txDropped", findPattern("TX.+dropped\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+    retval.set("txOverruns", findPattern("TX.+overruns\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+    retval.set("txCarrier", findPattern("TX.+carrier\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+
+    retval.set("collisions", findPattern("\\s+collisions\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+
+    retval.set("rxBytes", findPattern("RX\\s*bytes\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+    retval.set("txBytes", findPattern("TX\\s*bytes\\s*:\\s*(\\d+)", sb
+        .toString(), 1));
+
+    return retval;
+  }
+
+  /**
+   * Invokes query() to do the parsing and handles parsing errors for 
+   * each one of the NICs specified in the configuration. 
+   * 
+   * @return an array of EventRecords that holds one element that represents
+   * the current state of network interfaces.
+   */
+  public EventRecord[] monitor() {
+    ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
+
+    for (String nic : nics) {
+      try {
+        recs.add(query(nic));
+      } catch (UnknownHostException e) {
+        e.printStackTrace();
+      }
+    }
+
+    EventRecord[] T = new EventRecord[recs.size()];
+
+    return recs.toArray(T);
+  }
+  
+  /**
+   * Return a String with information about this class
+   * 
+   * @return A String describing this class
+   */
+  public String getInfo() {
+    String retval = "ifconfig parser for interfaces: ";
+    for (String nic : nics)
+      retval += nic + " ";
+    return retval;
+  }
+}

+ 132 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/OfflineAnonymizer.java

@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+
+/**********************************************************
+ * This class can be used to anonymize logs independently of
+ * Hadoop and the Executor. It parses the specified log file to
+ * create log records for it and then passes them to the Anonymizer.
+ * After they are anonymized, they are written to a local file,
+ * which is then compressed and stored locally.
+ * 
+ **********************************************************/
+
+public class OfflineAnonymizer {
+
+  public enum LogType {
+    HADOOP, SYSTEM
+  };
+
+  LogType logtype;
+
+  File logfile;
+
+  LogParser parser;
+
+  /**
+   * Creates an OfflineAnonymizer for a specific log file.
+   * 
+   * @param logtype the type of the log file. This can either be
+   * LogFile.HADOOP or LogFile.SYSTEM
+   * @param filename the path to the log file
+   * 
+   */  
+  public OfflineAnonymizer(LogType logtype, String filename) {
+
+    logfile = new File(filename);
+
+    if (!logfile.exists()) {
+      System.err.println("Input file does not exist!");
+      System.exit(0);
+    }
+
+    if (logtype == LogType.HADOOP)
+      parser = new HadoopLogParser(filename);
+    else
+      parser = new SystemLogParser(filename);
+  }
+
+  /**
+   * Performs anonymization for the log file. Log entries are
+   * read one by one and EventRecords are created, which are then
+   * anonymized and written to the output.
+   * 
+   */
+  public void anonymize() throws Exception {
+    EventRecord er = null;
+    SerializedRecord sr = null;
+
+    BufferedWriter bfw = new BufferedWriter(new FileWriter(logfile.getName()
+        + ".anonymized"));
+
+    System.out.println("Anonymizing log records...");
+    while ((er = parser.getNext()) != null) {
+      if (er.isValid()) {
+        sr = new SerializedRecord(er);
+        Anonymizer.anonymize(sr);
+        bfw.write(LocalStore.pack(sr).toString());
+        bfw.write(LocalStore.RECORD_SEPARATOR);
+      }
+    }
+    bfw.flush();
+    bfw.close();
+    System.out.println("Anonymized log records written to " + logfile.getName()
+        + ".anonymized");
+
+    System.out.println("Compressing output file...");
+    LocalStore.zipCompress(logfile.getName() + ".anonymized");
+    System.out.println("Compressed output file written to " + logfile.getName()
+        + ".anonymized" + LocalStore.COMPRESSION_SUFFIX);
+  }
+
+  public static void main(String[] args) {
+
+    if (args.length < 2) {
+      System.out.println("Usage: OfflineAnonymizer <log_type> <filename>");
+      System.out
+          .println("where <log_type> is either \"hadoop\" or \"system\" and <filename> is the path to the log file");
+      System.exit(0);
+    }
+
+    LogType logtype = null;
+
+    if (args[0].equalsIgnoreCase("-hadoop"))
+      logtype = LogType.HADOOP;
+    else if (args[0].equalsIgnoreCase("-system"))
+      logtype = LogType.SYSTEM;
+    else {
+      System.err.println("Invalid first argument.");
+      System.exit(0);
+    }
+
+    OfflineAnonymizer oa = new OfflineAnonymizer(logtype, args[1]);
+
+    try {
+      oa.anonymize();
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+
+    return;
+  }
+}

+ 163 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/PersistentState.java

@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.util.Properties;
+import java.util.Calendar;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+/**********************************************************
+ * This class takes care of the information that needs to be
+ * persistently stored locally on nodes. Bookkeeping is done for the
+ * state of parsing of log files, so that the portion of the file that
+ * has already been parsed in previous calls will not be parsed again.
+ * For each log file, we maintain the byte offset of the last
+ * character parsed in previous passes. Also, the first entry in the
+ * log file is stored, so that FailMon can determine when a log file
+ * has been rotated (and thus parsing needs to start from the
+ * beginning of the file). We use a property file to store that
+ * information. For each log file we create a property keyed by the
+ * filename, the value of which contains the byte offset and first log
+ * entry separated by a SEPARATOR.
+ * 
+ **********************************************************/
+
+public class PersistentState {
+
+  private final static String SEPARATOR = "###";
+  
+  static String filename;
+  static Properties persData = new Properties();
+  
+  /**
+   * Read the state of parsing for all open log files from a property
+   * file.
+   * 
+   * @param fname the filename of the property file to be read
+   */
+
+  public static void readState(String fname) {
+
+    filename = fname;
+    
+    try {
+      persData.load(new FileInputStream(filename));
+    } catch (FileNotFoundException e1) {
+      // ignore
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+   /**
+   * Read and return the state of parsing for a particular log file.
+   * 
+   * @param fname the log file for which to read the state
+   */
+  public static ParseState getState(String fname) {
+    String [] fields = persData.getProperty(fname, "null" + SEPARATOR + "0").split(SEPARATOR, 2);
+    String firstLine;
+    long offset;
+    
+    if (fields.length < 2) {
+      System.err.println("Malformed persistent state data found");
+      Environment.logInfo("Malformed persistent state data found");
+      firstLine = null;
+      offset = 0;
+    } else {
+      firstLine = (fields[0].equals("null") ? null : fields[0]);
+      offset = Long.parseLong(fields[1]);
+    }
+
+    return new ParseState(fname, firstLine, offset);
+  }
+
+  /**
+   * Set the state of parsing for a particular log file.
+   * 
+   * @param state the ParseState to set
+   */
+  public static void setState(ParseState state) {
+
+    if (state == null) {
+      System.err.println("Null state found");
+      Environment.logInfo("Null state found");
+    }
+
+    persData.setProperty(state.filename, state.firstLine + SEPARATOR + state.offset);
+  }
+
+  /**
+   * Upadate the state of parsing for a particular log file.
+   * 
+   * @param filename the log file for which to update the state
+   * @param firstLine the first line of the log file currently
+   * @param offset the byte offset of the last character parsed
+   */ 
+  public static void updateState(String filename, String firstLine, long offset) {
+
+    ParseState ps = getState(filename);
+
+    if (firstLine != null)
+      ps.firstLine = firstLine;
+
+    ps.offset = offset;
+
+    setState(ps);
+  }
+
+  /**
+   * Write the state of parsing for all open log files to a property
+   * file on disk.
+   * 
+   * @param fname the filename of the property file to write to
+   */
+  public static void writeState(String fname) {
+    try {
+      persData.store(new FileOutputStream(fname), Calendar.getInstance().getTime().toString());
+    } catch (FileNotFoundException e1) {
+      e1.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+  
+}
+
+/**********************************************************
+ * This class represents the state of parsing for a particular log
+ * file.
+ * 
+ **********************************************************/
+
+class ParseState {
+
+  public String filename;
+  public String firstLine;
+  public long offset;
+
+  public ParseState(String _filename, String _firstLine, long _offset) {
+    this.filename = _filename;
+    this.firstLine = _firstLine;
+    this.offset = _offset;
+  }
+}

+ 120 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/RunOnce.java

@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.util.ArrayList;
+
+/**********************************************************
+* Runs a set of monitoring jobs once for the local node. The set of
+* jobs to be run is the intersection of the jobs specifed in the
+* configuration file and the set of jobs specified in the --only
+* command line argument.
+ **********************************************************/ 
+
+public class RunOnce {
+
+  LocalStore lstore;
+
+  ArrayList<MonitorJob> monitors;
+  
+  boolean uploading = true;
+  
+  public RunOnce(String confFile) {
+    
+    Environment.prepare(confFile);
+    
+    String localTmpDir;
+    
+    // running as a stand-alone application
+    localTmpDir = System.getProperty("java.io.tmpdir");
+    Environment.setProperty("local.tmp.dir", localTmpDir);
+        
+    monitors = Environment.getJobs();
+    lstore = new LocalStore();
+    uploading  = true;
+  }
+
+  private void filter (String [] ftypes) {
+    ArrayList<MonitorJob> filtered = new ArrayList<MonitorJob>();
+    boolean found;
+    
+    // filter out unwanted monitor jobs
+    for (MonitorJob job : monitors) {
+      found = false;
+      for (String ftype : ftypes)
+	if (job.type.equalsIgnoreCase(ftype))
+	    found = true;
+      if (found)
+	filtered.add(job);
+    }
+
+    // disable uploading if not requested
+    found = false;
+    for (String ftype : ftypes)
+      if (ftype.equalsIgnoreCase("upload"))
+	found = true;
+
+    if (!found)
+      uploading = false;
+    
+    monitors = filtered;
+  }
+  
+  private void run() {
+    
+    Environment.logInfo("Failmon started successfully.");
+
+    for (int i = 0; i < monitors.size(); i++) {
+      Environment.logInfo("Calling " + monitors.get(i).job.getInfo() + "...\t");
+      monitors.get(i).job.monitor(lstore);
+    }
+
+    if (uploading)
+      lstore.upload();
+
+    lstore.close();
+  }
+
+  public void cleanup() {
+    // nothing to be done
+  }
+
+  
+  public static void main (String [] args) {
+
+    String configFilePath = "./conf/failmon.properties";
+    String [] onlyList = null;
+    
+    // Parse command-line parameters
+    for (int i = 0; i < args.length - 1; i++) {
+      if (args[i].equalsIgnoreCase("--config"))
+	configFilePath = args[i + 1];
+      else if (args[i].equalsIgnoreCase("--only"))
+	onlyList = args[i + 1].split(",");
+    }
+
+    RunOnce ro = new RunOnce(configFilePath);
+    // only keep the requested types of jobs
+    if (onlyList != null)
+      ro.filter(onlyList);
+    // run once only
+    ro.run();
+  }
+
+}

+ 206 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SMARTParser.java

@@ -0,0 +1,206 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**********************************************************
+ * Objects of this class parse the output of smartmontools to 
+ * gather information about the state of disks in the system. The
+ * smartmontools utility reads the S.M.A.R.T. attributes from
+ * the disk devices and reports them to the user. Note that since
+ * running smartctl requires superuser provileges, one should  
+ * grand sudo privileges to the running user for the command smartctl
+ * (without a password). Alternatively, one can set up a cron  job that 
+ * periodically dumps the output of smartctl into a user-readable file.
+ * See the configuration file for details.
+ *
+ **********************************************************/
+
+public class SMARTParser extends ShellParser {
+
+  String[] devices;
+
+  /**
+   * Constructs a SMARTParser and reads the list of disk 
+   * devices to query
+   */
+  public SMARTParser() {
+    super();
+    String devicesStr = Environment.getProperty("disks.list");
+    System.out.println("skato " + devicesStr);
+    if (devicesStr != null)
+      devices = devicesStr.split(",\\s*");
+  }
+
+  /**
+   * Reads and parses the output of smartctl for a specified disk and 
+   * creates an appropriate EventRecord that holds the desirable 
+   * information for it. Since the output of smartctl is different for 
+   * different kinds of disks, we try to identify as many attributes as 
+   * posssible for all known output formats. 
+   * 
+   * @param device the disk device name to query
+   * 
+   * @return the EventRecord created
+   */
+  public EventRecord query(String device) throws Exception {
+    String conf = Environment.getProperty("disks." + device + ".source");
+    CharSequence sb;
+
+    if (conf == null)
+      sb = Environment.runCommandGeneric("sudo smartctl --all " + device);
+    else
+      sb = Environment.runCommandGeneric("cat " + conf);
+
+    EventRecord retval = new EventRecord(InetAddress.getLocalHost()
+        .getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
+        .getHostName()), Calendar.getInstance(), "SMART", "Unknown",
+        (conf == null ? "sudo smartctl --all " + device : "file " + conf), "-");
+    // IBM SCSI disks
+    retval.set("model", findPattern("Device\\s*:\\s*(.*)", sb.toString(), 1));
+    retval.set("serial", findPattern("Serial\\s+Number\\s*:\\s*(.*)", sb
+        .toString(), 1));
+    retval.set("firmware", findPattern("Firmware\\s+Version\\s*:\\s*(.*)", sb
+        .toString(), 1));
+    retval.set("capacity", findPattern("User\\s+Capacity\\s*:\\s*(.*)", sb
+        .toString(), 1));
+    retval.set("status", findPattern("SMART\\s*Health\\s*Status:\\s*(.*)", sb
+        .toString(), 1));
+    retval.set("current_temperature", findPattern(
+        "Current\\s+Drive\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1));
+    retval.set("trip_temperature", findPattern(
+        "Drive\\s+Trip\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1));
+    retval.set("start_stop_count", findPattern(
+        "start\\s+stop\\s+count\\s*:\\s*(\\d*)", sb.toString(), 1));
+
+    String[] var = { "read", "write", "verify" };
+    for (String s : var) {
+      retval.set(s + "_ecc_fast", findPattern(s + "\\s*:\\s*(\\d*)", sb
+          .toString(), 1));
+      retval.set(s + "_ecc_delayed", findPattern(s
+          + "\\s*:\\s*(\\d+\\s+){1}(\\d+)", sb.toString(), 2));
+      retval.set(s + "_rereads", findPattern(
+          s + "\\s*:\\s*(\\d+\\s+){2}(\\d+)", sb.toString(), 2));
+      retval.set(s + "_GBs", findPattern(s
+          + "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*)", sb.toString(), 2));
+      retval.set(s + "_uncorrected",
+          findPattern(s + "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*){1}\\s+(\\d+)", sb
+              .toString(), 3));
+    }
+
+    // Hitachi IDE, SATA
+    retval.set("model", findPattern("Device\\s*Model\\s*:\\s*(.*)", sb
+        .toString(), 1));
+    retval.set("serial", findPattern("Serial\\s+number\\s*:\\s*(.*)", sb
+        .toString(), 1));
+    retval.set("protocol", findPattern("Transport\\s+protocol\\s*:\\s*(.*)", sb
+        .toString(), 1));
+    retval.set("status", "PASSED".equalsIgnoreCase(findPattern(
+        "test\\s*result\\s*:\\s*(.*)", sb.toString(), 1)) ? "OK" : "FAILED");
+
+    readColumns(retval, sb);
+
+    return retval;
+  }
+
+  /**
+   * Reads attributes in the following format:
+   * 
+   * ID# ATTRIBUTE_NAME          FLAG     VALUE WORST THRESH TYPE      UPDATED  WHEN_FAILED RAW_VALUE
+   * 3 Spin_Up_Time             0x0027   180   177   063    Pre-fail  Always       -       10265
+   * 4 Start_Stop_Count         0x0032   253   253   000    Old_age   Always       -       34
+   * 5 Reallocated_Sector_Ct    0x0033   253   253   063    Pre-fail  Always       -       0
+   * 6 Read_Channel_Margin      0x0001   253   253   100    Pre-fail  Offline      -       0
+   * 7 Seek_Error_Rate          0x000a   253   252   000    Old_age   Always       -       0
+   * 8 Seek_Time_Performance    0x0027   250   224   187    Pre-fail  Always       -       53894
+   * 9 Power_On_Minutes         0x0032   210   210   000    Old_age   Always       -       878h+00m
+   * 10 Spin_Retry_Count        0x002b   253   252   157    Pre-fail  Always       -       0
+   * 11 Calibration_Retry_Count 0x002b   253   252   223    Pre-fail  Always       -       0
+   * 12 Power_Cycle_Count       0x0032   253   253   000    Old_age   Always       -       49
+   * 192 PowerOff_Retract_Count 0x0032   253   253   000    Old_age   Always       -       0
+   * 193 Load_Cycle_Count       0x0032   253   253   000    Old_age   Always       -       0
+   * 194 Temperature_Celsius    0x0032   037   253   000    Old_age   Always       -       37
+   * 195 Hardware_ECC_Recovered 0x000a   253   252   000    Old_age   Always       -       2645
+   * 
+   * This format is mostly found in IDE and SATA disks.
+   * 
+   * @param er the EventRecord in which to store attributes found
+   * @param sb the text to parse
+   * 
+   * @return the EventRecord in which new attributes are stored.
+   */
+  private EventRecord readColumns(EventRecord er, CharSequence sb) {
+
+    Pattern pattern = Pattern.compile("^\\s{0,2}(\\d{1,3}\\s+.*)$",
+        Pattern.MULTILINE);
+    Matcher matcher = pattern.matcher(sb);
+
+    while (matcher.find()) {
+      String[] tokens = matcher.group(1).split("\\s+");
+      boolean failed = false;
+      // check if this attribute is a failed one
+      if (!tokens[8].equals("-"))
+        failed = true;
+      er.set(tokens[1].toLowerCase(), (failed ? "FAILED:" : "") + tokens[9]);
+    }
+
+    return er;
+  }
+
+  /**
+   * Invokes query() to do the parsing and handles parsing errors for 
+   * each one of the disks specified in the configuration. 
+   * 
+   * @return an array of EventRecords that holds one element that represents
+   * the current state of the disk devices.
+   */
+  public EventRecord[] monitor() {
+    ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
+
+    for (String device : devices) {
+      try {
+        recs.add(query(device));
+      } catch (Exception e) {
+        e.printStackTrace();
+      }
+    }
+
+    EventRecord[] T = new EventRecord[recs.size()];
+
+    return recs.toArray(T);
+  }
+  
+  /**
+   * Return a String with information about this class
+   * 
+   * @return A String describing this class
+   */
+  public String getInfo() {
+    String retval = "S.M.A.R.T. disk attributes parser for disks ";
+    for (String device : devices)
+      retval += device + " ";
+    return retval;
+  }
+
+}

+ 112 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SensorsParser.java

@@ -0,0 +1,112 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.net.InetAddress;
+import java.util.Calendar;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**********************************************************
+ * Objects of this class parse the output of the lm-sensors utility 
+ * to gather information about fan speed, temperatures for cpus
+ * and motherboard etc.
+ *
+ **********************************************************/
+
+public class SensorsParser extends ShellParser {
+
+  /**
+   * Reads and parses the output of the 'sensors' command 
+   * and creates an appropriate EventRecord that holds 
+   * the desirable information.
+   * 
+   * @param s unused parameter
+   * 
+   * @return the EventRecord created
+   */
+  public EventRecord query(String s) throws Exception {
+    CharSequence sb;
+
+    //sb = Environment.runCommandGeneric("sensors -A");
+     sb = Environment.runCommandGeneric("cat sensors.out");
+
+    EventRecord retval = new EventRecord(InetAddress.getLocalHost()
+        .getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
+        .getHostName()), Calendar.getInstance(), "lm-sensors", "Unknown",
+        "sensors -A", "-");
+    readGroup(retval, sb, "fan");
+    readGroup(retval, sb, "in");
+    readGroup(retval, sb, "temp");
+    readGroup(retval, sb, "Core");
+
+    return retval;
+  }
+
+  /**
+   * Reads and parses lines that provide the output
+   * of a group of sensors with the same functionality.
+   * 
+   * @param er the EventRecord to which the new attributes are added
+   * @param sb the text to parse
+   * @param prefix a String prefix specifying the common prefix of the
+   * sensors' names in the group (e.g. "fan", "in", "temp"
+   * 
+   * @return the EventRecord created
+   */
+  private EventRecord readGroup(EventRecord er, CharSequence sb, String prefix) {
+
+    Pattern pattern = Pattern.compile(".*(" + prefix
+        + "\\s*\\d*)\\s*:\\s*(\\+?\\d+)", Pattern.MULTILINE);
+    Matcher matcher = pattern.matcher(sb);
+
+    while (matcher.find())
+      er.set(matcher.group(1), matcher.group(2));
+
+    return er;
+  }
+
+  /**
+   * Invokes query() to do the parsing and handles parsing errors. 
+   * 
+   * @return an array of EventRecords that holds one element that represents
+   * the current state of the hardware sensors
+   */
+  public EventRecord[] monitor() {
+    EventRecord[] recs = new EventRecord[1];
+
+    try {
+      recs[0] = query(null);
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+
+    return recs;
+  }
+  
+  /**
+   * Return a String with information about this class
+   * 
+   * @return A String describing this class
+   */
+  public String getInfo() {
+    return ("lm-sensors parser");
+  }
+
+}

+ 163 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SerializedRecord.java

@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.HashMap;
+import java.text.DateFormat;
+
+/**********************************************************
+ * Objects of this class hold the serialized representations
+ * of EventRecords. A SerializedRecord is essentially an EventRecord
+ * with all its property values converted to strings. It also provides 
+ * some convenience methods for printing the property fields in a 
+ * more readable way.
+ *
+ **********************************************************/
+
+public class SerializedRecord {
+
+  HashMap<String, String> fields;
+  private static DateFormat dateFormatter =
+    DateFormat.getDateTimeInstance(DateFormat.LONG, DateFormat.LONG);;
+
+  /**
+   * Create the SerializedRecord given an EventRecord.
+   */
+  
+  public SerializedRecord(EventRecord source) {
+    fields = new HashMap<String, String>();
+    fields.clear();
+
+    for (String k : source.getMap().keySet()) {
+      ArrayList<String> strs = getStrings(source.getMap().get(k));
+      if (strs.size() == 1)
+        fields.put(k, strs.get(0));
+      else
+        for (int i = 0; i < strs.size(); i++)
+          fields.put(k + "#" + i, strs.get(i));
+    }
+
+  }
+
+  /**
+   * Extract String representations from an Object.
+   * 
+   * @param o the input object
+   * 
+   * @return an ArrayList that contains Strings found in o
+   */
+  private ArrayList<String> getStrings(Object o) {
+    ArrayList<String> retval = new ArrayList<String>();
+    retval.clear();
+    if (o == null)
+      retval.add("null");
+    else if (o instanceof String)
+      retval.add((String) o);
+    else if (o instanceof Calendar)
+      retval.add(dateFormatter.format(((Calendar) o).getTime()));
+    else if (o instanceof InetAddress[])
+      for (InetAddress ip : ((InetAddress[]) o))
+        retval.add(ip.getHostAddress());
+    else if (o instanceof String[])
+      for (String s : (String []) o)
+        retval.add(s);
+    else
+      retval.add(o.toString());
+
+    return retval;
+  }
+
+  /**
+   * Set the value of a property of the EventRecord.
+   * 
+   * @param fieldName the name of the property to set
+   * @param fieldValue the value of the property to set
+   * 
+   */
+  public void set(String fieldName, String fieldValue) {
+    fields.put(fieldName, fieldValue);
+  }
+
+  /**
+   * Get the value of a property of the EventRecord.
+   * If the property with the specific key is not found,
+   * null is returned.
+   * 
+   * @param fieldName the name of the property to get.
+   */
+  public String get(String fieldName) {
+    return fields.get(fieldName);
+  }
+
+  /**
+   * Arrange the keys to provide a more readable printing order:
+   * first goes the timestamp, then the hostname and then the type, followed
+   * by all other keys found.
+   * 
+   * @param keys The input ArrayList of keys to re-arrange.
+   */
+  public static void arrangeKeys(ArrayList<String> keys) {
+    move(keys, "timestamp", 0);
+    move(keys, "hostname", 1);
+    move(keys, "type", 2);
+  }
+
+  private static void move(ArrayList<String> keys, String key, int position) {
+    int cur = keys.indexOf(key);
+    if (cur == -1)
+      return;
+    keys.set(cur, keys.get(position));
+    keys.set(position, key);
+  }
+
+  /**
+   * Check if the SerializedRecord is a valid one, i.e., whether
+   * it represents meaningful metric values.
+   * 
+   * @return true if the EventRecord is a valid one, false otherwise.
+   */
+  public boolean isValid() {
+    return !("invalid".equalsIgnoreCase(fields.get("hostname")));
+  }
+
+  
+  /**
+   * Creates and returns a string reperssentation of the object
+   * 
+   * @return a String representing the object
+   */
+
+  public String toString() {
+    String retval = "";
+    ArrayList<String> keys = new ArrayList<String>(fields.keySet());
+    arrangeKeys(keys);
+
+    for (int i = 0; i < keys.size(); i++) {
+      String value = fields.get(keys.get(i));
+      if (value == null)
+        retval += keys.get(i) + ":\tnull\n";
+      else
+        retval += keys.get(i) + ":\t" + value + "\n";
+    }
+    return retval;
+  }
+}

+ 102 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/ShellParser.java

@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**********************************************************
+ * Objects of this class parse the output of system command-line
+ * utilities that can give information about the state of  
+ * various hardware components in the system. Typically, each such
+ * object either invokes a command and reads its output or reads the 
+ * output of one such command from a file on the disk. Currently 
+ * supported utilities include ifconfig, smartmontools, lm-sensors,
+ * /proc/cpuinfo.
+ *
+ **********************************************************/
+
+public abstract class ShellParser implements Monitored {
+
+  /**
+   * Find the first occurence ofa pattern in a piece of text 
+   * and return a specific group.
+   * 
+   *  @param strPattern the regular expression to match
+   *  @param text the text to search
+   *  @param grp the number of the matching group to return
+   *  
+   *  @return a String containing the matched group of the regular expression
+   */
+  protected String findPattern(String strPattern, String text, int grp) {
+
+    Pattern pattern = Pattern.compile(strPattern, Pattern.MULTILINE);
+    Matcher matcher = pattern.matcher(text);
+
+    if (matcher.find(0))
+      return matcher.group(grp);
+
+    return null;
+  }
+
+  /**
+   * Finds all occurences of a pattern in a piece of text and returns 
+   * the matching groups.
+   * 
+   *  @param strPattern the regular expression to match
+   *  @param text the text to search
+   *  @param grp the number of the matching group to return
+   *  @param separator the string that separates occurences in the returned value
+   *  
+   *  @return a String that contains all occurences of strPattern in text, 
+   *  separated by separator
+   */
+  protected String findAll(String strPattern, String text, int grp,
+      String separator) {
+
+    String retval = "";
+    boolean firstTime = true;
+
+    Pattern pattern = Pattern.compile(strPattern);
+    Matcher matcher = pattern.matcher(text);
+
+    while (matcher.find()) {
+      retval += (firstTime ? "" : separator) + matcher.group(grp);
+      firstTime = false;
+    }
+
+    return retval;
+  }
+
+  /**
+   * Insert all EventRecords that can be extracted for
+   * the represented hardware component into a LocalStore.
+   * 
+   * @param ls the LocalStore into which the EventRecords 
+   * are to be stored.
+   */
+  public void monitor(LocalStore ls) {
+    ls.insert(monitor());
+  }
+
+  abstract public EventRecord[] monitor();
+
+  abstract public EventRecord query(String s) throws Exception;
+
+}

+ 126 - 0
common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SystemLogParser.java

@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.failmon;
+
+import java.io.IOException;
+import java.util.Calendar;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**********************************************************
+ * An object of this class parses a Unix system log file to create
+ * appropriate EventRecords. Currently, only the syslogd logging 
+ * daemon is supported.
+ * 
+ **********************************************************/
+
+public class SystemLogParser extends LogParser {
+
+  static String[] months = { "January", "February", "March", "April", "May",
+      "June", "July", "August", "September", "October", "November", "December" };
+  /**
+   * Create a new parser object .
+   */  
+  public SystemLogParser(String fname) {
+    super(fname);
+    if ((dateformat = Environment.getProperty("log.system.dateformat")) == null)
+      dateformat = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s+(\\d+)";
+    if ((timeformat = Environment.getProperty("log.system.timeformat")) == null)
+      timeformat = "\\d{2}:\\d{2}:\\d{2}";
+  }
+
+  /**
+   * Parses one line of the log. If the line contains a valid 
+   * log entry, then an appropriate EventRecord is returned, after all
+   * relevant fields have been parsed.
+   *
+   *  @param line the log line to be parsed
+   *    
+   *  @return the EventRecord representing the log entry of the line. If 
+   *  the line does not contain a valid log entry, then the EventRecord 
+   *  returned has isValid() = false. When the end-of-file has been reached,
+   *  null is returned to the caller.
+   */
+  public EventRecord parseLine(String line) throws IOException {
+
+    EventRecord retval = null;
+
+    if (line != null) {
+      // process line
+      String patternStr = "(" + dateformat + ")";
+      patternStr += "\\s+";
+      patternStr += "(" + timeformat + ")";
+      patternStr += "\\s+(\\S*)\\s"; // for hostname
+//      patternStr += "\\s*([\\w+\\.?]+)"; // for source
+      patternStr += ":?\\s*(.+)"; // for the message
+      Pattern pattern = Pattern.compile(patternStr);
+      Matcher matcher = pattern.matcher(line);
+      if (matcher.find() && matcher.groupCount() >= 0) {
+        retval = new EventRecord(hostname, ips, parseDate(matcher.group(1),
+            matcher.group(4)), "SystemLog", "Unknown", // loglevel
+            "Unknown", // source
+            matcher.group(6)); // message
+      } else {
+        retval = new EventRecord();
+      }
+    }
+
+    return retval;
+  }
+
+  /**
+   * Parse a date found in the system log.
+   * 
+   * @return a Calendar representing the date
+   */
+  protected Calendar parseDate(String strDate, String strTime) {
+    Calendar retval = Calendar.getInstance();
+    // set date
+    String[] fields = strDate.split("\\s+");
+    retval.set(Calendar.MONTH, parseMonth(fields[0]));
+    retval.set(Calendar.DATE, Integer.parseInt(fields[1]));
+    // set time
+    fields = strTime.split(":");
+    retval.set(Calendar.HOUR_OF_DAY, Integer.parseInt(fields[0]));
+    retval.set(Calendar.MINUTE, Integer.parseInt(fields[1]));
+    retval.set(Calendar.SECOND, Integer.parseInt(fields[2]));
+    return retval;
+  }
+
+  /**
+   * Convert the name of a month to the corresponding int value.
+   * 
+   * @return the int representation of the month.
+   */
+  private int parseMonth(String month) {
+    for (int i = 0; i < months.length; i++)
+      if (months[i].startsWith(month))
+        return i;
+    return -1;
+  }
+  
+  /**
+   * Return a String with information about this class
+   * 
+   * @return A String describing this class
+   */
+  public String getInfo() {
+    return ("System Log Parser for file : " + file.getAbsoluteFile());
+  }
+}

+ 272 - 0
common/src/contrib/hod/CHANGES.txt

@@ -0,0 +1,272 @@
+HOD Change Log
+
+Trunk (unreleased changes)
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-5022. Provide an option to remove all log files older
+    than the configured time via logcondense.
+    (Peeyush Bishnoi via yhemanth)
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+    HADOOP-2898. Provide an option to specify a port range for
+    Hadoop services provisioned by HOD.
+    (Peeyush Bishnoi via yhemanth)
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+    HADOOP-5113. Fixed logcondense to remove files for usernames
+    beginning with characters specified in the -l option.
+    (Peeyush Bishnoi via yhemanth)
+
+Release 0.20.0 - (unreleased changes)
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+    HADOOP-4705. Grant read permissions for files/directories
+    created by HOD. (Peeyush Bishnoi via yhemanth)
+
+    HADOOP-4937. Include ringmaster RPC port in the notes
+    attribute. (Peeyush Bishnoi via yhemanth)
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+    HADOOP-4782. Revert umask changes in HADOOP-4705 so that
+    files are still securely created. (Peeyush Bishnoi via
+    yhemanth)
+
+Release 0.19.0 - 2008-11-18
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+    HADOOP-3695. Provide an ability to start multiple workers per node.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+    HADOOP-3959. Pass options specified in resource_manager.options to
+    job submission. 
+    (Craig Macdonald and Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-3814. Remove generation of dfs.client.buffer.dir for the generated
+    hadoop-site.xml. (Vinod Kumar Vavilapalli via acmurthy)
+
+Release 0.18.2 - Unreleased 
+
+  BUG FIXES
+
+    HADOOP-3786. Use HDFS instead of DFS in all docs and hyperlink to Torque.
+    (Vinod Kumar Vavilapalli via acmurthy)
+
+Release 0.18.1 - 2008-09-17
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-4060. Modified HOD to rotate log files on the client side.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+  IMPROVEMENTS
+
+    HADOOP-4145. Add an accounting plugin (script) for HOD.
+    (Hemanth Yamijala via nigel)
+
+  BUG FIXES
+
+    HADOOP-4161. Fixed bug in HOD cleanup that had the potential to
+    hang clients. (Vinod Kumar Vavilapalli via nigel)
+
+Release 0.18.0 - 2008-08-19
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-3483. Modified HOD to create a cluster directory if one does not
+    exist and to auto-deallocate a cluster while reallocating it, if it is
+    already dead. (Hemanth Yamijala via mukund)
+
+    HADOOP-3184. Modified HOD to handle master failures on bad nodes by trying 
+    to bring them up on another node in the ring. (Hemanth Yamijala via ddas)
+
+    HADOOP-3610. Modified HOD to create cluster directory if one does not
+    exist when using the script option. (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-3808. Modified HOD to include RPC port of the JobTracker
+    into the notes attribute of the resource manager. (yhemanth)
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+    HADOOP-3376: Provide a mechanism to detect and handle violations to 
+    resource manager limits. (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-3151. Improves error messages when reporting failures due to 
+    incorrect parameters passed to HOD. (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-3464. Implemented a mechanism to transfer HOD errors that occur on
+    compute nodes to the submit node running the HOD client, so users have good
+    feedback on why an allocation failed. (Vinod Kumar Vavilapalli via mukund)
+
+    HADOOP-3505. Updated HOD documentation with changes made for Hadoop
+    0.18. (Vinod Kumar Vavilapalli via yhemanth)
+ 
+  BUG FIXES
+
+    HADOOP-2961. Avoids unnecessary checks for some configuration parameters
+    related to service configuration. (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-3523. Fixes auto-deallocation of cluster if job id is not found in
+    Torque's job list (Hemanth Yamijala via ddas)
+
+    HADOOP-3531. Fixes a bug related to handling JobTracker failures because of
+    timing issues on slow nodes. (Hemanth Yamijala via ddas)
+
+    HADOOP-3564. HOD generates values for the parameter dfs.datanode.ipc.address
+    in the hadoop-site.xml created on datanodes. 
+    (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-3076. Fixes a bug related to a spurious message about the 
+    script.exitcode file when a cluster directory is specified as a relative
+    path. (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-3668. Makes editorial changes to HOD documentation.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-3703. Fixes logcondense.py to use the new format of hadoop dfs -lsr
+    command line output format. (Vinod Kumar Vavilapalli via yhemanth)
+
+Release 0.17.3 - Unreleased 
+
+  BUG FIXES
+
+    HADOOP-3217. Decrease the rate at which the hod queries the resource
+    manager for job status. (Hemanth Yamijala via acmurthy) 
+
+Release 0.17.0 - 2008-05-18
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-3137. Modified build script to pick up version automatically
+    from Hadoop build. (yhemanth)
+
+  IMPROVEMENTS
+
+    HADOOP-2775.  Adds unit test framework for HOD.
+    (Vinod Kumar Vavilapalli via ddas).
+
+    HADOOP-2848. [HOD]hod -o list and deallocate works even after deleting
+    the cluster directory. (Hemanth Yamijala via ddas)
+
+    HADOOP-2899. [HOD] Cleans up hdfs:///mapredsystem directory after
+    deallocation. (Hemanth Yamijala via ddas)
+
+    HADOOP-2796. Enables distinguishing exit codes from user code vis-a-vis
+    HOD's exit code. (Hemanth Yamijala via ddas)
+
+    HADOOP-2947. HOD redirects stdout and stderr of daemons to assist
+    getting stack traces. (Vinod Kumar Vavilapalli via yhemanth)
+
+  BUG FIXES
+
+    HADOOP-2924. Fixes an address problem to do with TaskTracker binding
+    to an address. (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-2970. Fixes a problem to do with Wrong class definition for
+    hodlib/Hod/hod.py for Python < 2.5.1.
+    (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-2783. Fixes a problem to do with import in
+    hod/hodlib/Common/xmlrpc.py. (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-2936. Fixes HOD in a way that it generates hdfs://host:port on the
+    client side configs. (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-2983. [HOD] Fixes the problem - local_fqdn() returns None when
+    gethostbyname_ex doesnt return any FQDNs. (Craig Macdonald via ddas)
+
+    HADOOP-2982. Fixes a problem in the way HOD looks for free nodes.
+    (Hemanth Yamijala via ddas)
+
+    HADOOP-2855. Fixes the way HOD handles relative paths for cluster
+    directory, script file and other options.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-3153. Fixes the way HOD handles allocation if the user has no
+    permissions to update the clusters state file.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+Release 0.16.4 - 2008-05-05
+
+  BUG FIXES
+
+    HADOOP-3304. [HOD] Fixes the way the logcondense.py utility searches
+    for log files that need to be deleted. (yhemanth via mukund)
+
+Release 0.16.2 - 2008-04-02
+
+  BUG FIXES
+
+    HADOOP-3103. [HOD] Hadoop.tmp.dir should not be set to cluster
+    directory. (Vinod Kumar Vavilapalli via ddas).
+
+Release 0.16.1 - 2008-03-13
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-2861. Improve the user interface for the HOD commands.
+    Command line structure has changed. (Hemanth Yamijala via nigel)
+
+  IMPROVEMENTS
+
+    HADOOP-2730. HOD documentation update.
+    (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-2911. Make the information printed by the HOD allocate and
+    info commands less verbose and clearer. (Vinod Kumar via nigel)
+
+  BUG FIXES
+
+    HADOOP-2766. Enables setting of HADOOP_OPTS env variable for the hadoop
+    daemons through HOD. (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-2809.  Fix HOD syslog config syslog-address so that it works.
+    (Hemanth Yamijala via nigel)
+
+    HADOOP-2847.  Ensure idle cluster cleanup works even if the JobTracker
+    becomes unresponsive to RPC calls. (Hemanth Yamijala via nigel)
+
+    HADOOP-2925. Fix HOD to create the mapred system directory using a
+    naming convention that will avoid clashes in multi-user shared
+    cluster scenario. (Hemanth Yamijala via nigel)
+
+Release 0.16.0 - 2008-02-07
+
+  NEW FEATURES
+
+    HADOOP-1301.  Hadoop-On-Demand (HOD): resource management
+    provisioning for Hadoop. (Hemanth Yamijala via nigel)
+
+  BUG FIXES
+
+    HADOOP-2720. Jumbo bug fix patch to HOD.  Final sync of Apache SVN with
+    internal Yahoo SVN.  (Hemanth Yamijala via nigel)
+
+    HADOOP-2740. Fix HOD to work with the configuration variables changed in
+    HADOOP-2404. (Hemanth Yamijala via omalley)
+

+ 104 - 0
common/src/contrib/hod/README

@@ -0,0 +1,104 @@
+                        Hadoop On Demand
+                        ================
+
+1. Introduction:
+================
+
+The Hadoop On Demand (HOD) project is a system for provisioning and 
+managing independent Hadoop MapReduce instances on a shared cluster 
+of nodes. HOD uses a resource manager for allocation. At present it
+supports Torque (http://www.clusterresources.com/pages/products/torque-resource-manager.php)
+out of the box. 
+
+2. Feature List:
+================
+
+The following are the features provided by HOD:
+
+2.1 Simplified interface for managing MapReduce clusters:
+
+The MapReduce user interacts with the cluster through a simple 
+command line interface, the HOD client. HOD brings up a virtual 
+MapReduce cluster with the required number of nodes, which the 
+user can use for running Hadoop jobs. When done, HOD will 
+automatically clean up the resources and make the nodes available 
+again.
+
+2.2 Automatic installation of Hadoop:
+
+With HOD, Hadoop does not need to be even installed on the cluster.
+The user can provide a Hadoop tarball that HOD will automatically 
+distribute to all the nodes in the cluster.
+
+2.3 Configuring Hadoop:
+
+Dynamic parameters of Hadoop configuration, such as the NameNode and 
+JobTracker addresses and ports, and file system temporary directories
+are generated and distributed by HOD automatically to all nodes in
+the cluster.
+
+In addition, HOD allows the user to configure Hadoop parameters
+at both the server (for e.g. JobTracker) and client (for e.g. JobClient)
+level, including 'final' parameters, that were introduced with 
+Hadoop 0.15.
+
+2.4 Auto-cleanup of unused clusters:
+
+HOD has an automatic timeout so that users cannot misuse resources they 
+aren't using. The timeout applies only when there is no MapReduce job 
+running. 
+
+2.5 Log services:
+
+HOD can be used to collect all MapReduce logs to a central location
+for archiving and inspection after the job is completed.
+
+3. HOD Components
+=================
+
+This is a brief overview of the various components of HOD and how they
+interact to provision Hadoop.
+
+HOD Client: The HOD client is a Unix command that users use to allocate 
+Hadoop MapReduce clusters. The command provides other options to list 
+allocated clusters and deallocate them. The HOD client generates the 
+hadoop-site.xml in a user specified directory. The user can point to 
+this configuration file while running Map/Reduce jobs on the allocated 
+cluster.
+
+RingMaster: The RingMaster is a HOD process that is started on one node 
+per every allocated cluster. It is submitted as a 'job' to the resource 
+manager by the HOD client. It controls which Hadoop daemons start on 
+which nodes. It provides this information to other HOD processes, 
+such as the HOD client, so users can also determine this information. 
+The RingMaster is responsible for hosting and distributing the 
+Hadoop tarball to all nodes in the cluster. It also automatically 
+cleans up unused clusters.
+
+HodRing: The HodRing is a HOD process that runs on every allocated node
+in the cluster. These processes are run by the RingMaster through the 
+resource manager, using a facility of parallel execution. The HodRings
+are responsible for launching Hadoop commands on the nodes to bring up 
+the Hadoop daemons. They get the command to launch from the RingMaster.
+
+Hodrc / HOD configuration file: An INI style configuration file where
+the users configure various options for the HOD system, including
+install locations of different software, resource manager parameters,
+log and temp file directories, parameters for their MapReduce jobs,
+etc.
+
+Submit Nodes: Nodes where the HOD Client is run, from where jobs are
+submitted to the resource manager system for allocating and running 
+clusters.
+
+Compute Nodes: Nodes which get allocated by a resource manager, 
+and on which the Hadoop daemons are provisioned and started.
+
+4. Next Steps:
+==============
+
+- Read getting_started.txt to get an idea of how to get started with
+installing, configuring and running HOD.
+
+- Read config.txt to get more details on configuration options for HOD.
+

+ 1 - 0
common/src/contrib/hod/bin/VERSION

@@ -0,0 +1 @@
+0.4.0

+ 31 - 0
common/src/contrib/hod/bin/checknodes

@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+PBS_NODES_PATH=`which pbsnodes 2>/dev/null`
+if [ -z $PBS_NODES_PATH ]
+then
+  echo Could not find pbsnodes in path. Cannot check available number of nodes. >&2
+  exit 1
+fi
+if [ -z $1 ]
+then
+  echo Usage: checknodes queue-name >&2
+  exit 2
+fi
+# the number of nodes marked 'free', and which do not contain a jobs attribute from the server or from the moms.
+$PBS_NODES_PATH :$1 | awk 'BEGIN {c=0} /state = free/ {getline;getline;getline;getline; if ($0 !~ /jobs =/ && $0 !~ /jobs=[0-9].*/)  c++ ; } END {print c}'

+ 580 - 0
common/src/contrib/hod/bin/hod

@@ -0,0 +1,580 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+""":"
+work_dir=$(dirname $0)
+base_name=$(basename $0)
+original_dir=$PWD
+cd $work_dir
+
+if [ $HOD_PYTHON_HOME ]; then
+    exec $HOD_PYTHON_HOME -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir
+elif [ -e /usr/bin/python ]; then
+    exec /usr/bin/python -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir
+elif [ -e /usr/local/bin/python ]; then
+    exec /usr/local/bin/python -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir
+else
+    exec python -u -OO $base_name ${1+"$@"} --hod.original-dir $work_dir
+fi
+":"""
+
+"""The executable to be used by the user"""
+import sys, os, re, pwd, threading, sys
+
+myName          = os.path.basename(sys.argv[0])
+myName          = re.sub(".*/", "", myName)
+binDirectory    = os.path.realpath(sys.argv[0])
+rootDirectory   = re.sub("/bin/.*", "", binDirectory)
+libDirectory    = rootDirectory
+
+sys.path.append(libDirectory)
+
+from hodlib.Hod.hod import hodRunner
+from hodlib.Common.setup import *
+from hodlib.Common.descGenerator import *
+from hodlib.Common.util import local_fqdn, need_to_allocate, filter_warnings,\
+    get_exception_error_string, hodInterrupt, \
+    HOD_INTERRUPTED_MESG, HOD_INTERRUPTED_CODE,\
+    TORQUE_USER_LIMITS_COMMENT_FIELD
+from hodlib.Common.tcp import tcpError, tcpSocket
+from hodlib.Hod.hod import hodHelp
+
+filter_warnings()
+
+reVersion = re.compile(".*(\d+_\d+).*")
+
+VERSION = None
+if os.path.exists("./VERSION"):
+  vFile = open("./VERSION", 'r')
+  VERSION = vFile.readline()
+  vFile.close()
+
+# Always look for hodrc file here unless otherwise specified with -c:   
+DEFAULT_LOC = os.path.join(rootDirectory, 'conf')
+DEFAULT_HOD_DIR = os.path.join(os.environ['HOME'], ".hod")
+
+if not os.path.isdir(DEFAULT_HOD_DIR):
+  os.mkdir(DEFAULT_HOD_DIR, 0777)
+
+DEFAULT_CONFIG = os.path.join(DEFAULT_HOD_DIR, 'hodrc')
+if not os.path.exists(DEFAULT_CONFIG):
+  if os.environ.has_key('HOD_CONF_DIR') and os.environ['HOD_CONF_DIR'] is not None:
+    DEFAULT_CONFIG = os.path.join(os.environ['HOD_CONF_DIR'], 'hodrc')
+
+# Definition tuple is of the form:
+#  (name, type, description, help?, default value, required?, validate?, 
+#   short option)
+#
+defList = { 'hod' : (      
+             ('original-dir', 'directory', 'hod original start directory',
+              False, None, True, True, 'r'),
+
+             ('clusterdir', 'directory', 
+             'Directory where cluster state information and hadoop-site.xml' +
+             ' will be stored.',
+              True, None, False, False, 'd'),
+
+             ('syslog-address', 'address', 'Syslog address.',
+              False, None, False, True, 'y'),
+              
+             ('java-home', 'directory', 'Java home directory.',
+              True, None, True, True, 'j'),
+            
+             ('debug', 'pos_int', 'Debugging level, 0-4.',
+              True, 3, True, True, 'b'),
+            
+             ('stream', 'bool', 'Output to stderr.',
+              False, True, False, True),
+
+             ('nodecount', 'pos_int', 
+              'Number of nodes to allocate at startup. ',
+              True, None, False, True, 'n'),
+
+             ('script', 'file', 'Hadoop script to execute.',
+              True, None, False, False, 's'), 
+
+             ('userid', 'user_account', 
+              'User ID the hod shell is running under.',
+              False, pwd.getpwuid(os.getuid())[0], False, True, 'u'),
+             
+             ('allocate-wait-time', 'pos_int', 
+              'Time to wait for cluster allocation.',
+              False, 300, True, True, 'e'),         
+              
+             ('operation', 'string',
+              'Initiate a hod operation. (help, allocate, deallocate ...)',
+              False, None, False, True, 'o'),
+             
+             ('cluster-factor', 'pos_float',
+              'The number of grid slots per machines', False, 1.9, False, True,
+              'x'),
+             
+             ('cluster', 'string', 'Name of cluster being used.',
+              False, None, True, True, 'w'),
+
+             ('proxy-xrs-address', 'address', 
+              'Address to Allocation Manager XML RPC proxy.',
+              False, None, False, True, 'p'),
+              
+             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
+              False, None, True, True),
+
+             ('client-params', 'keyval', 'Hadoop client xml key/value list',
+              True, None, False, True, 'C'), 
+
+             ('hadoop-ui-log-dir', 'directory', 'Directory to store Web UI Logs of Hadoop',
+              True, None, False, True),
+
+             ('temp-dir', 'directory', 'HOD temporary directories.',
+              False, None, True, False),
+
+             ('update-worker-info', 'bool', 'Specifies whether to update Worker Info after allocation',
+              False, False, False, True),
+
+             ('job-feasibility-attr', 'string', 'Specifies whether to check job feasibility - resource manager and/or scheduler limits, also gives the attribute value',
+              False, None, False, True),
+
+             ('title', 'string', 'Title for the current HOD allocation.',
+               True, "HOD", False, True, 'N'),
+
+             ('walltime', 'pos_int', 'Walltime in seconds for the current HOD allocation',
+              True, None, False, True, 'l'),
+
+             ('script-wait-time', 'pos_int', 'Specifies the time to wait before running the script. Used with the hod.script option.',
+              True, 10, False, True, 'W'),
+
+             ('log-rollover-count', 'pos_int', 'Specifies the number of rolled-over log files of HOD client. A zero value disables rollover.',
+              True, 5, False, True, 'L'),
+
+             ('job-status-query-interval', 'pos_int', 'Specifies the time between checking for job status', 
+              False, 30, False, True),
+
+             ('job-command-failure-interval', 'pos_int', 'Specifies the time between checking for failed job status or submission commands', 
+              False, 10, False, True),
+
+             ('job-status-query-failure-retries', 'pos_int', 'Specifies the number of times job status failure queries are retried', 
+              False, 3, False, True),
+
+             ('job-submission-failure-retries', 'pos_int', 'Specifies the number of times job submission failure queries are retried',
+              False, 3, False, True)),
+
+            'resource_manager' : (
+             ('id', 'string', 'Batch scheduler ID: torque|condor.',
+              False, None, True, True),
+             
+             ('pbs-user', 'user_account', 'User ID jobs are submitted under.',
+              False, None, False, True),
+              
+             ('pbs-account', 'string', 'User Account jobs are submitted under.',
+              True, None, False, False, 'A'),
+              
+             ('queue', 'string', 'Queue of the batch scheduler to query.',
+              True, 'batch', False, True, 'Q'),
+             
+             ('batch-home', 'directory', 'Scheduler installation directory.',
+              False, None, True, True),
+             
+             ('options', 'keyval', 'Options to pass to the scheduler.',
+              False, None, False, True),
+
+             ('env-vars', 'keyval', 'Environment variables to pass to the submitted jobs.',
+              False, None, False, True)),
+                            
+            'ringmaster' : (
+             ('work-dirs', 'list', 'hod work directories',
+              False, None, True, False),
+
+             ('temp-dir', 'directory', 'Ringmaster temporary directory.',
+              False, None, True, False),
+              
+             ('log-dir', 'directory', 'hod logging directory.', 
+              False, os.path.join(rootDirectory, 'logs'), False, False),
+
+             ('syslog-address', 'address', 'Syslog address.',
+              False, None, False, True),
+
+             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
+              False, None, True, True),
+              
+             ('http-port-range', 'range', 'HTTP port range n-m.',
+              False, None, True, True),
+              
+             ('debug', 'pos_int', 'Debugging level, 0-4.',
+              False, 4, True,   True),
+               
+             ('register', 'bool', 'Register with service registry?',
+              False, True, True, True),
+               
+             ('stream', 'bool', 'Output to stderr.',
+              False, False, False, True),
+              
+             ('userid', 'user_account', 
+              'User ID the hod shell is running under.',
+              False, pwd.getpwuid(os.getuid())[0], False, True),
+               
+             ('svcrgy-addr', 'address', 'Download HTTP address.',
+              False, None, False, False),             
+             
+             ('hadoop-tar-ball', 'uri', 'hadoop program tar ball.',
+              True, None, False, False, 't'),
+
+             ('max-connect','pos_int','max connections allowed for a single tarball server',
+             False, 30, False, True),
+
+             ('jt-poll-interval', 'pos_int', 'How often to poll the Job tracker for idleness',
+             False, 120, False, True),
+
+             ('idleness-limit', 'pos_int', 'Limit after which to deallocate the cluster',
+             False, 3600, False, True),
+
+             ('max-master-failures', 'pos_int', 
+              'Defines how many times a master can fail before' \
+              ' failing cluster allocation', False, 5, True, True),
+
+             ('workers_per_ring', 'pos_int', 'Defines number of workers per service per hodring',
+             False, 1, False, True)),
+
+            'gridservice-mapred' : (
+             ('external', 'bool', "Connect to an already running MapRed?",
+              False, False, True, True),
+              
+             ('host', 'hostname', 'Mapred hostname.', 
+              False, 'localhost', False, False),
+
+             ('info_port', 'pos_int', 'Mapred info port.',
+              False, None, False, False),
+             
+             ('tracker_port', 'pos_int', 'Mapred job tracker port.',
+              False, None, False, False),
+                        
+             ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.',
+              False, None, False, False),
+
+             ('server-params', 'keyval', 'Hadoop xml key/value list',
+              True, None, False, True, 'M'),
+               
+             ('envs', 'keyval', 'environment to run this package in',
+              False, None, False, True),
+
+             ('final-server-params', 'keyval', 'Hadoop final xml key/val list',
+              False, None, False, True, 'F'),
+
+             ('pkgs', 'directory', "directory where the package is installed",
+              False, None, False, False)), 
+               
+               
+            'gridservice-hdfs' : (
+             ('external', 'bool', "Connect to an already running HDFS?",
+              False, False, True, True),
+             
+             ('host', 'hostname', 'HDFS hostname.', 
+              False, 'localhost', False, False),
+             
+             ('fs_port', 'pos_int', 'HDFS port.',
+              False, None, False, False),
+              
+             ('info_port', 'pos_int', 'HDFS info port.',
+              False, None, False, False), 
+             
+             ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.',
+              False, None, False, False),
+
+             ('server-params', 'keyval', 'Hadoop xml key/value list',
+              False, None, False, True, 'H'),
+
+             ('final-server-params', 'keyval', 'Hadoop final xml key/value list',
+              False, None, False, True, 'S'),
+           
+             ('envs', 'keyval', 'Environment in which to run this package.',
+              False, None, False, True),
+
+             ('pkgs', 'directory', "directory where the package is installed",
+              False, None, False, False)),           
+             
+             
+            'hodring' : (
+             ('temp-dir', 'list', 'hodring temporary directory.',
+              False, None, True, False),
+              
+             ('log-dir', 'directory', 'hod logging directory.', 
+              False, os.path.join(rootDirectory, 'logs'), False, False), 
+              
+             ('log-destination-uri', 'string', 
+              'URI to store logs to, local://some_path or '
+              + 'hdfs://host:port/some_path', 
+              False, None, False, True),
+
+             ('pkgs', 'directory', 'Path to Hadoop to use in case of uploading to HDFS',
+              False, None, False, False),
+              
+             ('syslog-address', 'address', 'Syslog address.',
+              False, None, False, True),
+          
+             ('java-home', 'directory', 'Java home directory.',
+              False, None, True, False),
+              
+             ('debug', 'pos_int', 'Debugging level, 0-4.',
+              False, 3, True, True),
+               
+             ('register', 'bool', 'Register with service registry?',
+              False, True, True, True),
+               
+             ('stream', 'bool', 'Output to stderr.',
+              False, False, False, True),
+
+             ('userid', 'user_account', 
+              'User ID the hod shell is running under.',
+              False, pwd.getpwuid(os.getuid())[0], False, True),
+               
+             ('command', 'string', 'Command for hodring to run.',
+              False, None, False, True),
+
+             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
+              False, None, True, True),
+               
+             ('http-port-range', 'range', 'HTTP port range n-m.',
+              False, None, True, True),
+              
+             ('hadoop-port-range', 'range', 'Hadoop port range n-m.',
+              False, None, True, True),  
+            
+             ('service-id', 'string', 'Service ID.',
+              False, None, False, True),
+              
+             ('download-addr', 'string', 'Download HTTP address.',
+              False, None, False, True),
+               
+             ('svcrgy-addr', 'address', 'Download HTTP address.',
+              False, None, False, True), 
+    
+             ('ringmaster-xrs-addr', 'address', 'Ringmaster XML-RPC address.',
+              False, None, False, True),
+
+             ('tarball-retry-initial-time', 'pos_float','Initial Retry time for tarball download',
+              False, 1, False, True),
+              
+             ('tarball-retry-interval', 'pos_float','interval to spread retries for tarball download',
+              False, 3, False, True),
+              
+             ('cmd-retry-initial-time', 'pos_float','Initial retry time for getting commands',
+              False, 2, False, True),
+             
+             ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands',
+              False, 2, False, True),
+
+             ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.',
+              False, '/mapredsystem', False, False))
+              }   
+
+defOrder = [ 'hod', 'ringmaster', 'hodring', 'resource_manager', 
+             'gridservice-mapred', 'gridservice-hdfs' ]
+
+def printErrors(msgs):
+  for msg in msgs:
+    print msg
+
+def op_requires_pkgs(config):
+  if config['hod'].has_key('operation'):
+    return config['hod']['operation'].startswith('allocate')
+  else:
+    return config['hod'].has_key('script')
+
+if __name__ == '__main__':  
+  try:
+    confDef = definition()
+    confDef.add_defs(defList, defOrder)
+    hodhelp = hodHelp()
+    usage = hodhelp.help()
+            
+    hodOptions = options(confDef, usage,
+                      VERSION, withConfig=True, defaultConfig=DEFAULT_CONFIG,
+                      name=myName )
+    # hodConfig is a dict like object, hodConfig[section][name]
+    try:
+      hodConfig = config(hodOptions['config'], configDef=confDef, 
+                       originalDir=hodOptions['hod']['original-dir'],
+                       options=hodOptions) 
+    except IOError, e:
+      print >>sys.stderr,"error: %s not found. Specify the path to the HOD configuration file, or define the environment variable %s under which a file named hodrc can be found." % (hodOptions['config'], 'HOD_CONF_DIR')
+      sys.exit(1)
+  
+    # Conditional validation
+    statusMsgs = []
+
+    if hodConfig.normalizeValue('gridservice-hdfs', 'external'):
+      # For external HDFS
+      statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs',
+                                                'fs_port'))
+      statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs',
+                                                'info_port'))
+      statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs',
+                                                'host'))
+    else:
+      hodConfig['gridservice-hdfs']['fs_port'] = 0 # Dummy
+      hodConfig['gridservice-hdfs']['info_port'] = 0 # Not used at all
+
+    if hodConfig.normalizeValue('gridservice-mapred', 'external'):
+      statusMsgs.extend(hodConfig.validateValue('gridservice-mapred',
+                                                'tracker_port'))
+      statusMsgs.extend(hodConfig.validateValue('gridservice-mapred',
+                                                'info_port'))
+      statusMsgs.extend(hodConfig.validateValue('gridservice-mapred',
+                                                'host'))
+    else:
+      hodConfig['gridservice-mapred']['tracker_port'] = 0 # Dummy
+      hodConfig['gridservice-mapred']['info_port'] = 0 # Not used at all
+
+    if len(statusMsgs) != 0:
+      for msg in statusMsgs:
+        print >>sys.stderr, msg
+      sys.exit(1)
+    # End of conditional validation
+
+    status = True
+    statusMsgs = []
+  
+    (status,statusMsgs) = hodConfig.verify()
+    if not status:
+      print >>sys.stderr,"error: bin/hod failed to start."
+      for msg in statusMsgs:
+        print >>sys.stderr,"%s" % (msg)
+      sys.exit(1)
+  
+    ## TODO : should move the dependency verification to hodConfig.verify
+    if hodConfig['hod'].has_key('operation') and \
+      hodConfig['hod'].has_key('script'):
+      print "Script operation is mutually exclusive with other HOD operations"
+      hodOptions.print_help(sys.stderr)
+      sys.exit(1)
+    
+    if 'operation' not in hodConfig['hod'] and 'script' not in hodConfig['hod']:
+      print "HOD requires at least a script or operation be specified."
+      hodOptions.print_help(sys.stderr)
+      sys.exit(1)    
+    
+    if hodConfig['gridservice-hdfs']['external']:
+      hdfsAddress = "%s:%s" % (hodConfig['gridservice-hdfs']['host'], 
+                               hodConfig['gridservice-hdfs']['fs_port'])
+  
+      hdfsSocket = tcpSocket(hdfsAddress)
+        
+      try:
+        hdfsSocket.open()
+        hdfsSocket.close()
+      except tcpError:
+        printErrors(hodConfig.var_error('hod', 'gridservice-hdfs', 
+          "Failed to open a connection to external hdfs address: %s." % 
+          hdfsAddress))
+        sys.exit(1)
+    else:
+      hodConfig['gridservice-hdfs']['host'] = 'localhost'
+  
+    if hodConfig['gridservice-mapred']['external']:
+      mapredAddress = "%s:%s" % (hodConfig['gridservice-mapred']['host'], 
+                                 hodConfig['gridservice-mapred']['tracker_port'])
+  
+      mapredSocket = tcpSocket(mapredAddress)
+        
+      try:
+        mapredSocket.open()
+        mapredSocket.close()
+      except tcpError:
+        printErrors(hodConfig.var_error('hod', 'gridservice-mapred', 
+          "Failed to open a connection to external mapred address: %s." % 
+          mapredAddress))
+        sys.exit(1)
+    else:
+      hodConfig['gridservice-mapred']['host'] = 'localhost'
+  
+    if not hodConfig['ringmaster'].has_key('hadoop-tar-ball') and \
+      not hodConfig['gridservice-hdfs'].has_key('pkgs') and \
+      op_requires_pkgs(hodConfig):
+      printErrors(hodConfig.var_error('gridservice-hdfs', 'pkgs', 
+        "gridservice-hdfs.pkgs must be defined if ringmaster.hadoop-tar-ball "
+        + "is not defined."))
+      sys.exit(1)
+  
+    if not hodConfig['ringmaster'].has_key('hadoop-tar-ball') and \
+      not hodConfig['gridservice-mapred'].has_key('pkgs') and \
+      op_requires_pkgs(hodConfig):
+      printErrors(hodConfig.var_error('gridservice-mapred', 'pkgs', 
+        "gridservice-mapred.pkgs must be defined if ringmaster.hadoop-tar-ball "
+        + "is not defined."))
+      sys.exit(1)
+  
+    if hodConfig['hodring'].has_key('log-destination-uri'):
+      if hodConfig['hodring']['log-destination-uri'].startswith('file://'):
+        pass
+      elif hodConfig['hodring']['log-destination-uri'].startswith('hdfs://'):
+        hostPort = hodConfig['hodring']['log-destination-uri'][7:].split("/")
+        hostPort = hostPort[0]
+        socket = tcpSocket(hostPort)
+        try:
+          socket.open()
+          socket.close()
+        except:
+          printErrors(hodConfig.var_error('hodring', 'log-destination-uri', 
+          "Unable to contact host/port specified in log destination uri: %s" % 
+          hodConfig['hodring']['log-destination-uri']))
+          sys.exit(1)
+      else:
+        printErrors(hodConfig.var_error('hodring', 'log-destination-uri', 
+          "The log destiniation uri must be of type local:// or hdfs://."))
+        sys.exit(1)
+  
+    if hodConfig['ringmaster']['workers_per_ring'] < 1:
+      printErrors(hodConfig.var_error('ringmaster', 'workers_per_ring',
+                "ringmaster.workers_per_ring must be a positive integer " +
+                "greater than or equal to 1"))
+      sys.exit(1)
+                        
+    ## TODO : end of should move the dependency verification to hodConfig.verif
+      
+    hodConfig['hod']['base-dir'] = rootDirectory
+    hodConfig['hod']['user_state'] = DEFAULT_HOD_DIR
+  
+    dGen = DescGenerator(hodConfig)
+    hodConfig = dGen.initializeDesc()
+    
+    os.environ['JAVA_HOME'] = hodConfig['hod']['java-home']
+    
+    if hodConfig['hod']['debug'] == 4:
+      print ""
+      print "Using Python: %s" % sys.version
+      print ""
+   
+    hod = hodRunner(hodConfig)
+  
+    # Initiate signal handling
+    hodInterrupt.set_log(hod.get_logger())
+    hodInterrupt.init_signals()
+    # Interrupts set up. Now on we handle signals only when we wish to.
+  except KeyboardInterrupt:
+    print HOD_INTERRUPTED_MESG
+    sys.exit(HOD_INTERRUPTED_CODE)
+  
+  opCode = 0
+  try:
+    if hodConfig['hod'].has_key('script'):
+      opCode = hod.script()
+    else:  
+      opCode = hod.operation()
+  except Exception, e:
+    print "Uncaught Exception : %s" % e
+  finally:
+    sys.exit(opCode)

+ 183 - 0
common/src/contrib/hod/bin/hodcleanup

@@ -0,0 +1,183 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+""":"
+work_dir=$(dirname $0)
+base_name=$(basename $0)
+original_dir=$PWD
+cd $work_dir
+
+if [ $HOD_PYTHON_HOME ]; then
+    exec $HOD_PYTHON_HOME -u -OO $base_name ${1+"$@"}
+elif [ -e /usr/bin/python ]; then
+    exec /usr/bin/python -u -OO $base_name ${1+"$@"}
+elif [ -e /usr/local/bin/python ]; then
+    exec /usr/local/bin/python -u -OO $base_name ${1+"$@"}
+else
+    exec python -u -OO $base_name ${1+"$@"}
+fi
+":"""
+
+"""The executable to be used by the user"""
+import sys, os, re, pwd, threading, sys, random, time, pprint, shutil, time, re
+from pprint import pformat
+from optparse import OptionParser
+
+myName          = os.path.basename(sys.argv[0])
+myName          = re.sub(".*/", "", myName)
+binDirectory    = os.path.realpath(sys.argv[0])
+rootDirectory   = re.sub("/bin/.*", "", binDirectory)
+libDirectory    = rootDirectory
+
+sys.path.append(libDirectory)
+
+from hodlib.Common.threads import simpleCommand
+from hodlib.Common.util import local_fqdn, tar, filter_warnings,\
+                            get_exception_string, get_exception_error_string
+from hodlib.Common.logger import hodLog
+from hodlib.Common.logger import getLogger
+from hodlib.HodRing.hodRing import createMRSystemDirectoryManager
+
+filter_warnings()
+
+reVersion = re.compile(".*(\d+_\d+).*")
+reHdfsURI = re.compile("(hdfs://.*?:\d+)(.*)")
+
+VERSION = None
+if os.path.exists("./VERSION"):
+  vFile = open("./VERSION", 'r')
+  VERSION = vFile.readline()
+  vFile.close()
+
+def __archive_logs(conf, log):
+  # need log-destination-uri, __hadoopLogDirs, temp-dir
+  status = True
+  logUri = conf['log-destination-uri']
+  hadoopLogDirs = conf['hadoop-log-dirs']
+  if logUri:
+    try:
+      if hadoopLogDirs:
+        date = time.localtime()
+        for logDir in hadoopLogDirs:
+          (head, tail) = os.path.split(logDir)
+          (head, logType) = os.path.split(head)
+          tarBallFile = "%s-%s-%04d%02d%02d%02d%02d%02d-%s.tar.gz" % (
+            logType, local_fqdn(), date[0], date[1], date[2], date[3], 
+            date[4], date[5], random.randint(0,1000))
+          
+          if logUri.startswith('file://'):
+            tarBallFile = os.path.join(logUri[7:], 
+                                       tarBallFile)
+          else:
+            tarBallFile = os.path.join(conf['temp-dir'], tarBallFile)
+          
+          log.debug('archiving log files to: %s' % tarBallFile)
+          status = tar(tarBallFile, logDir, ['*',])
+          log.info('archive %s status: %s' % (tarBallFile, status))
+          if status and \
+            logUri.startswith('hdfs://'):
+            __copy_archive_to_dfs(conf, tarBallFile)
+            log.info("copying archive to dfs finished")
+        dict = {} 
+    except:
+      log.error(get_exception_string())
+      status = False
+  return status
+
+
+def __copy_archive_to_dfs(conf, archiveFile):
+  # need log-destination-uri, hadoopCommandstring and/or pkgs
+  hdfsURIMatch = reHdfsURI.match(conf['log-destination-uri'])
+  
+  (head, tail) = os.path.split(archiveFile)
+  destFile = os.path.join(hdfsURIMatch.group(2), conf['user-id'], 'hod-logs', conf['service-id'], tail)
+  
+  log.info("copying archive %s to DFS %s ..." % (archiveFile, destFile))
+  
+  hadoopCmd = conf['hadoop-command-string']
+  if conf['pkgs']:
+    hadoopCmd = os.path.join(conf['pkgs'], 'bin', 'hadoop')
+
+  copyCommand = "%s dfs -fs %s -copyFromLocal %s %s" % (hadoopCmd, 
+    hdfsURIMatch.group(1), archiveFile, destFile)
+  
+  log.debug(copyCommand)
+  
+  copyThread = simpleCommand('hadoop', copyCommand)
+  copyThread.start()
+  copyThread.wait()
+  copyThread.join()
+  log.debug(pprint.pformat(copyThread.output()))
+  
+  os.unlink(archiveFile)
+
+def unpack():
+  parser = OptionParser()
+  option_list=["--log-destination-uri", "--hadoop-log-dirs", \
+          "--temp-dir", "--hadoop-command-string", "--pkgs", "--user-id", \
+          "--service-id", "--hodring-debug", "--hodring-log-dir", \
+          "--hodring-syslog-address", "--hodring-cleanup-list", \
+          "--jt-pid", "--mr-sys-dir", "--fs-name", "--hadoop-path"]
+  regexp = re.compile("^--")
+  for opt in option_list:
+    parser.add_option(opt,dest=regexp.sub("",opt),action="store")
+  option_list.append("--hodring-stream")
+  parser.add_option("--hodring-stream",dest="hodring-stream",metavar="bool",\
+                                                        action="store_true")
+  (options, args) = parser.parse_args()
+  _options= {}
+  _options['hodring'] = {}
+  for opt in dir(options):
+    if "--"+opt in option_list:
+      _options[opt] = getattr(options,opt)
+  if _options.has_key('hadoop-log-dirs') and _options['hadoop-log-dirs']:
+    _options['hadoop-log-dirs'] = _options['hadoop-log-dirs'].split(",")
+  if _options.has_key('hodring-syslog-address') and _options['hodring-syslog-address']:
+    _options['hodring']['syslog-address'] = \
+        _options['hodring-syslog-address'].split(':')
+  _options['hodring']['debug']        = int(_options['hodring-debug'])
+  _options['hodring']['log-dir']      = _options['hodring-log-dir']
+  _options['hodring']['stream']      = _options['hodring-stream']
+  _options['hodring']['userid']      = _options['user-id']
+  os.putenv('PBS_JOBID', _options['service-id'] )
+  return _options
+ 
+if __name__ == '__main__':  
+  log = None
+  try:
+    conf = unpack()
+    # Use the same log as hodring
+    log = getLogger(conf['hodring'],'hodring')
+    log.debug("Logger initialised successfully")
+    mrSysDirManager = createMRSystemDirectoryManager(conf, log)
+    if mrSysDirManager is not None:
+      mrSysDirManager.removeMRSystemDirectory()
+
+    status =  __archive_logs(conf,log)
+    log.info("Archive status : %s" % status)
+    list = conf['hodring-cleanup-list'].split(',')
+    log.info("now removing %s" % list)
+    for dir in list:
+     if os.path.exists(dir):
+       log.debug('removing %s' % (dir))
+       shutil.rmtree(dir, True)
+       log.debug("done")
+    log.info("Cleanup successfully completed")
+  except Exception, e:
+    if log:
+      log.info("Stack trace:\n%s\n%s" %(get_exception_error_string(),get_exception_string()))

+ 290 - 0
common/src/contrib/hod/bin/hodring

@@ -0,0 +1,290 @@
+#!/bin/sh
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+""":"
+work_dir=$(dirname $0)
+base_name=$(basename $0)
+cd $work_dir
+
+if [ $HOD_PYTHON_HOME ]; then
+    exec $HOD_PYTHON_HOME -OO $base_name ${1+"$@"}
+elif [ -e /usr/bin/python ]; then
+    exec /usr/bin/python -OO $base_name ${1+"$@"}
+elif [ -e /usr/local/bin/python ]; then
+    exec /usr/local/bin/python -OO $base_name ${1+"$@"}
+else
+    exec python -OO $base_name ${1+"$@"}
+fi
+":"""
+
+"""The executable to be used by the user"""
+import sys, os, re
+
+
+myName          = os.path.basename(sys.argv[0])
+myName          = re.sub(".*/", "", myName)
+binDirectory    = os.path.realpath(sys.argv[0])
+rootDirectory   = re.sub("/bin/.*", "", binDirectory)
+libDirectory    = rootDirectory
+
+sys.path.append(libDirectory)
+
+from hodlib.HodRing.hodRing import HodRing
+from hodlib.Common.setup import *
+from hodlib.Common.util import filter_warnings, get_exception_string, \
+                get_exception_error_string, getMapredSystemDirectory, \
+                to_http_url, local_fqdn
+from hodlib.Common.logger import getLogger, ensureLogDir
+from hodlib.Common.xmlrpc import hodXRClient
+
+filter_warnings()
+
+reVersion = re.compile(".*(\d+_\d+).*")
+
+VERSION = '$HeadURL$'
+
+reMatch = reVersion.match(VERSION)
+if reMatch:
+    VERSION = reMatch.group(1)
+    VERSION = re.sub("_", ".", VERSION)
+else:
+    VERSION = 'DEV'
+
+# Definition tuple is of the form:
+#  (name, type, description, default value, required?, validate?)
+#
+defList = { 'hodring' : (
+             ('temp-dir', 'directory', 'hod work directories',
+              False, None, True, False),
+              
+             ('log-dir', 'directory', 'hod logging directory.', 
+              False, os.path.join(rootDirectory, 'logs'), False, True), 
+
+             ('log-destination-uri', 'string', 
+              'URI to store logs to, local://some_path or '
+              + 'hdfs://host:port/some_path', 
+              False, None, False, True), 
+
+             ('pkgs', 'directory', 'Path to Hadoop to use in case of uploading to HDFS',
+              False, None, False, True),
+              
+             ('syslog-address', 'address', 'Syslog address.',
+              False, None, False, True),
+          
+             ('java-home', 'directory', 'Java home directory.',
+              False, None, True, True),
+              
+             ('debug', 'pos_int', 'Debugging level, 0-4.',
+              False, 3, True, True),
+               
+             ('register', 'bool', 'Register with service registry?',
+              False, True, True, True),
+               
+             ('stream', 'bool', 'Output to stderr.',
+              False, False, False, True),
+
+             ('userid', 'user_account', 
+              'User ID the hod shell is running under.',
+              False, None, True, False),
+
+             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
+              False, None, True, True),
+               
+             ('http-port-range', 'range', 'HTTP port range n-m.',
+              False, None, True, True),
+      
+             ('hadoop-port-range', 'range', 'Hadoop port range n-m.',
+              False, None, True, True),
+               
+             ('command', 'string', 'Command for hodring to run.',
+              False, None, False, True),
+              
+             ('service-id', 'string', 'Service ID.',
+              False, None, False, True),
+              
+             ('download-addr', 'string', 'Download HTTP address.',
+              False, None, False, True),
+               
+             ('svcrgy-addr', 'address', 'Service registry XMLRPC address.',
+              False, None, True, True), 
+    
+             ('ringmaster-xrs-addr', 'address', 'Ringmaster XML-RPC address.',
+              False, None, False, True),
+ 
+             ('tarball-retry-initial-time', 'pos_float','initial retry time for tarball download',
+              False, 1, False, True),
+              
+             ('tarball-retry-interval', 'pos_float','interval to spread retries for tarball download',
+              False, 3, False, True),
+              
+             ('cmd-retry-initial-time', 'pos_float','initial retry time for getting commands',
+              False, 2, False, True),
+             
+             ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands',
+              False, 2, False, True), 
+
+             ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.',
+              False, '/mapredsystem', False, False))
+            }            
+
+if __name__ == '__main__':
+
+  confDef = definition()
+  confDef.add_defs(defList)
+  hodRingOptions = options(confDef, "./%s [OPTIONS]" % myName, VERSION)
+  ensureLogDir(hodRingOptions['hodring']['log-dir'])
+  service = None
+  try:
+    (status, statusMsgs) = hodRingOptions.verify()
+    if not status:
+      raise Exception("%s" % statusMsgs)
+    hodRingOptions['hodring']['base-dir'] = rootDirectory
+    service = HodRing(hodRingOptions)
+    service.start()
+    service.wait()
+   
+    if service.log:
+      log = service.log
+    else: 
+      log = getLogger(hodRingOptions['hodring'],'hodring')
+
+    list = []
+    
+    runningHadoops = service.getRunningValues()
+
+    mrSysDirManager = None      
+    for cmd in runningHadoops:
+      if cmd.name == 'jobtracker':
+        mrSysDirManager = cmd.getMRSystemDirectoryManager()
+      log.debug("addding %s to cleanup list..." % cmd)
+      cmd.addCleanup(list)
+    
+    list.append(service.getTempDir())
+    log.debug(list)
+       
+    # archive_logs now
+    cmdString = os.path.join(rootDirectory, "bin", "hodcleanup") # same python
+
+    if (len(runningHadoops) == 0):
+      log.info("len(runningHadoops) == 0, No running cluster?")
+      log.info("Skipping __copy_archive_to_dfs")
+      hadoopString = ""
+    else: hadoopString=runningHadoops[0].path
+
+    #construct the arguments
+    if hodRingOptions['hodring'].has_key('log-destination-uri'):
+      cmdString = cmdString + " --log-destination-uri " \
+                    + hodRingOptions['hodring']['log-destination-uri']
+
+    hadoopLogDirs = service.getHadoopLogDirs()
+    if hadoopLogDirs:
+      cmdString = cmdString \
+                    + " --hadoop-log-dirs " \
+                    + ",".join(hadoopLogDirs)
+
+    cmdString = cmdString \
+                  + " --temp-dir " \
+                  + service._cfg['temp-dir'] \
+                  + " --hadoop-command-string " \
+                  + hadoopString \
+                  + " --user-id " \
+                  + service._cfg['userid'] \
+                  + " --service-id " \
+                  + service._cfg['service-id'] \
+                  + " --hodring-debug " \
+                  + str(hodRingOptions['hodring']['debug']) \
+                  + " --hodring-log-dir " \
+                  + hodRingOptions['hodring']['log-dir'] \
+                  + " --hodring-cleanup-list " \
+                  + ",".join(list)
+
+    if hodRingOptions['hodring'].has_key('syslog-address'):
+      syslogAddr = hodRingOptions['hodring']['syslog-address'][0] + \
+                   ':' + str(hodRingOptions['hodring']['syslog-address'][1])
+      cmdString = cmdString + " --hodring-syslog-address " + syslogAddr
+    if service._cfg.has_key('pkgs'):
+      cmdString = cmdString + " --pkgs " + service._cfg['pkgs']
+
+    if mrSysDirManager is not None:
+      cmdString = "%s %s" % (cmdString, mrSysDirManager.toCleanupArgs())
+
+    log.info("cleanup commandstring : ")
+    log.info(cmdString)
+
+    # clean up
+    cmd = ['/bin/sh', '-c', cmdString]
+
+    mswindows = (sys.platform == "win32")
+    originalcwd = os.getcwd()
+
+    if not mswindows:
+      try: 
+        pid = os.fork() 
+        if pid > 0:
+          # exit first parent
+          log.info("child(pid: %s) is now doing cleanup" % pid)
+          sys.exit(0) 
+      except OSError, e: 
+        log.error("fork failed: %d (%s)" % (e.errno, e.strerror)) 
+        sys.exit(1)
+
+      # decouple from parent environment
+      os.chdir("/") 
+      os.setsid() 
+      os.umask(0) 
+ 
+    MAXFD = 128 # more than enough file descriptors to close. Just in case.
+    for i in xrange(0, MAXFD):
+      try:
+        os.close(i)
+      except OSError:
+        pass
+  
+    try:
+      os.execvp(cmd[0], cmd)
+    finally:
+      log.critical("exec failed")
+      os._exit(1)
+
+  except Exception, e:
+    if service:
+      if service.log:
+        log = service.log
+    else:
+      log = getLogger(hodRingOptions['hodring'], 'hodring')
+    log.error("Error in bin/hodring %s. \nStack trace:\n%s" %(get_exception_error_string(),get_exception_string()))
+    
+    log.info("now trying informing to ringmaster")
+    log.info(hodRingOptions['hodring']['ringmaster-xrs-addr'])
+    log.info(hodRingOptions.normalizeValue('hodring', 'ringmaster-xrs-addr'))
+    log.info(to_http_url(hodRingOptions.normalizeValue( \
+            'hodring', 'ringmaster-xrs-addr')))
+    # Report errors to the Ringmaster if possible
+    try:
+      ringXRAddress = to_http_url(hodRingOptions.normalizeValue( \
+                                     'hodring', 'ringmaster-xrs-addr'))
+      log.debug("Creating ringmaster XML-RPC client.")
+      ringClient = hodXRClient(ringXRAddress)    
+      if ringClient is not None:
+        addr = local_fqdn() + "_" + str(os.getpid())
+        ringClient.setHodRingErrors(addr, str(e))
+        log.info("Reported errors to ringmaster at %s" % ringXRAddress)
+    except Exception, e:
+      log.error("Failed to report errors to ringmaster at %s" % ringXRAddress)
+      log.error("Reason : %s" % get_exception_string())
+    # End of reporting errors to the client

이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.