14 년 전 · 7dc7c7a7dd
--- a/common/.gitignore
+++ b/common/.gitignore
@@ -0,0 +1,44 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+*~
			
 
				+.classpath
			
 
				+.project
			
 
				+.settings
			
 
				+.svn
			
 
				+build/
			
 
				+build-fi/
			
 
				+build.properties
			
 
				+conf/masters
			
 
				+conf/slaves
			
 
				+conf/hadoop-env.sh
			
 
				+conf/hadoop-site.xml
			
 
				+conf/core-site.xml
			
 
				+conf/mapred-site.xml
			
 
				+conf/hdfs-site.xml
			
 
				+conf/hadoop-policy.xml
			
 
				+conf/capacity-scheduler.xml
			
 
				+conf/mapred-queue-acls.xml
			
 
				+docs/api/
			
 
				+ivy/hadoop-core.xml
			
 
				+ivy/hadoop-core-test.xml
			
 
				+ivy/ivy-*.jar
			
 
				+ivy/maven-ant-tasks-*.jar
			
 
				+logs/
			
 
				+src/contrib/ec2/bin/hadoop-ec2-env.sh
			
 
				+src/docs/build
			
 
				+src/docs/cn/build
			
 
				+src/docs/cn/src/documentation/sitemap.xmap
			
 
				+src/docs/cn/uming.conf
			
--- a/common/CHANGES.txt
+++ b/common/CHANGES.txt
@@ -0,0 +1,10635 @@
 
				+Hn jaadoop Change Log
			
 
				+
			
 
				+Trunk (unreleased changes)
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+   HADOOP-6904. Support method based RPC compatiblity. (hairong)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-7342. Add an utility API in FileUtil for JDK File.list
			
 
				+    avoid NPEs on File.list() (Bharath Mundlapudi via mattf)
			
 
				+
			
 
				+    HADOOP-7322. Adding a util method in FileUtil for directory listing,
			
 
				+    avoid NPEs on File.listFiles() (Bharath Mundlapudi via mattf)
			
 
				+
			
 
				+    HADOOP-6994. Api to get delegation token in AbstractFileSystem. (jitendra)
			
 
				+
			
 
				+    HADOOP-7171. Support UGI in FileContext API. (jitendra)
			
 
				+
			
 
				+    HADOOP-7257 Client side mount tables (sanjay)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-7133. Batch the calls in DataStorage to FileUtil.createHardLink().
			
 
				+    (Matt Foley via jghoman)
			
 
				+
			
 
				+    HADOOP-7054 Change NN LoadGenerator to use FileContext APIs
			
 
				+	  (Sanjay Radia)
			
 
				+
			
 
				+    HADOOP-7175. Add isEnabled() to Trash.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7180. Better support on CommandFormat on the API and exceptions.
			
 
				+    (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7202. Improve shell Command base class.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7224. Add CommandFactory to shell.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7230. Move "fs -help" shell command tests from HDFS to COMMOM; see
			
 
				+    also HDFS-1844.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7233. Refactor ls to conform to new FsCommand class.  (Daryn Sharp
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-7235. Refactor the tail command to conform to new FsCommand class.
			
 
				+    (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7227. Remove protocol version check at proxy creation in Hadoop
			
 
				+    RPC. (jitendra)
			
 
				+
			
 
				+    HADOOP-7236. Refactor the mkdir command to conform to new FsCommand class.
			
 
				+    (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7114. FsShell should dump all exceptions at DEBUG level.
			
 
				+    (todd via tomwhite)
			
 
				+
			
 
				+    HADOOP-7250. Refactor the setrep command to conform to new FsCommand class.
			
 
				+    (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7249. Refactor the chmod/chown/chgrp command to conform to new
			
 
				+    FsCommand class.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7251. Refactor the getmerge command to conform to new FsCommand
			
 
				+    class.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7265. Keep track of relative paths in PathData.  (Daryn Sharp
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-7238. Refactor the cat and text commands to conform to new FsCommand
			
 
				+    class.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7271. Standardize shell command error messages.  (Daryn Sharp
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-7275. Refactor the stat command to conform to new FsCommand
			
 
				+    class.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7237. Refactor the touchz command to conform to new FsCommand
			
 
				+    class.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7267. Refactor the rm/rmr/expunge commands to conform to new
			
 
				+    FsCommand class.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7285. Refactor the test command to conform to new FsCommand
			
 
				+    class. (Daryn Sharp via todd)
			
 
				+
			
 
				+    HADOOP-7289. In ivy.xml, test conf should not extend common conf.
			
 
				+    (Eric Yang via szetszwo)
			
 
				+
			
 
				+    HADOOP-7286. Refactor the du/dus/df commands to conform to new FsCommand
			
 
				+    class. (Daryn Sharp via todd)
			
 
				+
			
 
				+    HADOOP-7320. Refactor the copy and move commands to conform to new
			
 
				+    FsCommand class. (Daryn Sharp via todd)
			
 
				+
			
 
				+    HADOOP-7001.  Configuration changes can occur via the Reconfigurable
			
 
				+    interface. (Patrick Kline via dhruba)
			
 
				+
			
 
				+    HADOOP-7331. Make hadoop-daemon.sh return exit code 1 if daemon processes
			
 
				+    did not get started. (Tanping Wang via todd)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-7223. FileContext createFlag combinations are not clearly defined.
			
 
				+    (suresh)
			
 
				+
			
 
				+    HADOOP-7215. RPC clients must use network interface corresponding to 
			
 
				+    the host in the client's kerberos principal key. (suresh)
			
 
				+
			
 
				+    HADOOP-7268. FileContext.getLocalFSFileContext() behavior needs to be 
			
 
				+    fixed w.r.t tokens. (jitendra)
			
 
				+
			
 
				+    HADOOP-7216. Add FsCommand.runAll() with deprecated annotation for the
			
 
				+    transition of Command base class improvement.  (Daryn Sharp via szetszwo)
			
 
				+
			
 
				+    HADOOP-7207. fs member of FSShell is not really needed (boryas)
			
 
				+
			
 
				+    HADOOP-7231. Fix synopsis for -count. (Daryn Sharp via eli).
			
 
				+
			
 
				+    HADOOP-6920, HADOOP-7292, HADOOP-7306. Porting bugfixes portion of the 
			
 
				+    three patches to yahoo-merge branch.
			
 
				+
			
 
				+    HADOOP-7204. remove local unused fs variable from CmdHandler 
			
 
				+    and FsShellPermissions.changePermissions (boryas)
			
 
				+
			
 
				+    HADOOP-7210. Chown command is not working from FSShell
			
 
				+    (Uma Maheswara Rao G via todd)
			
 
				+
			
 
				+    HADOOP-7282. ipc.Server.getRemoteIp() may return null.  (John George
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-7336. TestFileContextResolveAfs will fail with default 
			
 
				+    test.build.data property. (jitendra)
			
 
				+
			
 
				+    HADOOP-7284 Trash and shell's rm does not work for viewfs (Sanjay Radia)
			
 
				+
			
 
				+    HADOOP-7287. Configuration deprecation mechanism doesn't work properly for
			
 
				+    GenericOptionsParser and Tools. (Aaron T. Myers via todd)
			
 
				+
			
 
				+Release 0.22.0 - Unreleased
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-6791.  Refresh for proxy superuser config
			
 
				+    (common part for HDFS-1096) (boryas)
			
 
				+
			
 
				+    HADOOP-6581. Add authenticated TokenIdentifiers to UGI so that 
			
 
				+    they can be used for authorization (Kan Zhang and Jitendra Pandey 
			
 
				+    via jghoman)
			
 
				+
			
 
				+    HADOOP-6584. Provide Kerberized SSL encryption for webservices.
			
 
				+    (jghoman and Kan Zhang via jghoman)
			
 
				+
			
 
				+    HADOOP-6853. Common component of HDFS-1045. (jghoman)
			
 
				+
			
 
				+    HADOOP-6859 - Introduce additional statistics to FileSystem to track 
			
 
				+    file system operations (suresh)
			
 
				+
			
 
				+    HADOOP-6870. Add a new API getFiles to FileSystem and FileContext that
			
 
				+    lists all files under the input path or the subtree rooted at the
			
 
				+    input path if recursive is true. Block locations are returned together
			
 
				+    with each file's status. (hairong)
			
 
				+
			
 
				+    HADOOP-6888. Add a new FileSystem API closeAllForUGI(..) for closing all
			
 
				+    file systems associated with a particular UGI.  (Devaraj Das and Kan Zhang
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-6892. Common component of HDFS-1150 (Verify datanodes' identities 
			
 
				+    to clients in secure clusters) (jghoman)
			
 
				+
			
 
				+    HADOOP-6889. Make RPC to have an option to timeout. (hairong)
			
 
				+
			
 
				+    HADOOP-6996. Allow CodecFactory to return a codec object given a codec'
			
 
				+    class name. (hairong)
			
 
				+
			
 
				+    HADOOP-7171. Support UGI in the FileContext. (jitendra)
			
 
				+
			
 
				+    HADOOP-6854. Options.createOpts should provide API to access Progress.
			
 
				+    (Krishna Ramachandran via jitendra)
			
 
				+
			
 
				+    HADOOP-7013. Add boolean field isCorrupt to BlockLocation. 
			
 
				+    (Patrick Kling via hairong)
			
 
				+
			
 
				+    HADOOP-6978. Adds support for NativeIO using JNI. 
			
 
				+    (Todd Lipcon, Devaraj Das & Owen O'Malley via ddas)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-6644. util.Shell getGROUPS_FOR_USER_COMMAND method name 
			
 
				+    should use common naming convention (boryas)
			
 
				+
			
 
				+    HADOOP-6778. add isRunning() method to 
			
 
				+    AbstractDelegationTokenSecretManager (for HDFS-1044) (boryas)
			
 
				+
			
 
				+    HADOOP-6633. normalize property names for JT/NN kerberos principal 
			
 
				+    names in configuration (boryas)
			
 
				+
			
 
				+    HADOOP-6627. "Bad Connection to FS" message in FSShell should print 
			
 
				+    message from the exception (boryas)
			
 
				+
			
 
				+    HADOOP-6600. mechanism for authorization check for inter-server 
			
 
				+    protocols. (boryas)
			
 
				+
			
 
				+    HADOOP-6623. Add StringUtils.split for non-escaped single-character
			
 
				+    separator. (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6761. The Trash Emptier has the ability to run more frequently.
			
 
				+    (Dmytro Molkov via dhruba)
			
 
				+
			
 
				+    HADOOP-6714. Resolve compressed files using CodecFactory in FsShell::text.
			
 
				+    (Patrick Angeles via cdouglas)
			
 
				+
			
 
				+    HADOOP-6661. User document for UserGroupInformation.doAs. 
			
 
				+    (Jitendra Pandey via jghoman)
			
 
				+
			
 
				+    HADOOP-6674. Makes use of the SASL authentication options in the
			
 
				+    SASL RPC. (Jitendra Pandey via ddas)
			
 
				+
			
 
				+    HADOOP-6526. Need mapping from long principal names to local OS 
			
 
				+    user names. (boryas)
			
 
				+
			
 
				+    HADOOP-6814. Adds an API in UserGroupInformation to get the real
			
 
				+    authentication method of a passed UGI. (Jitendra Pandey via ddas)
			
 
				+
			
 
				+    HADOOP-6756. Documentation for common configuration keys.
			
 
				+    (Erik Steffl via shv)
			
 
				+
			
 
				+    HADOOP-6835. Add support for concatenated gzip input. (Greg Roelofs via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-6845. Renames the TokenStorage class to Credentials. 
			
 
				+    (Jitendra Pandey via ddas)
			
 
				+
			
 
				+    HADOOP-6826. FileStatus needs unit tests. (Rodrigo Schmidt via Eli
			
 
				+    Collins)
			
 
				+
			
 
				+    HADOOP-6905. add buildDTServiceName method to SecurityUtil 
			
 
				+    (as part of MAPREDUCE-1718)  (boryas)
			
 
				+
			
 
				+    HADOOP-6632. Adds support for using different keytabs for different
			
 
				+    servers in a Hadoop cluster. In the earier implementation, all servers 
			
 
				+    of a certain type (like TaskTracker), would have the same keytab and the
			
 
				+    same principal. Now the principal name is a pattern that has _HOST in it.
			
 
				+    (Kan Zhang & Jitendra Pandey via ddas)
			
 
				+
			
 
				+    HADOOP-6861. Adds new non-static methods in Credentials to read and 
			
 
				+    write token storage file. (Jitendra Pandey & Owen O'Malley via ddas)
			
 
				+
			
 
				+    HADOOP-6877. Common part of HDFS-1178 (NameNode servlets should communicate
			
 
				+    with NameNode directrly). (Kan Zhang via jghoman)
			
 
				+    
			
 
				+    HADOOP-6475. Adding some javadoc to Server.RpcMetrics, UGI. 
			
 
				+    (Jitendra Pandey and borya via jghoman)
			
 
				+
			
 
				+    HADOOP-6656. Adds a thread in the UserGroupInformation to renew TGTs 
			
 
				+    periodically. (Owen O'Malley and ddas via ddas)
			
 
				+
			
 
				+    HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. (hairong)
			
 
				+
			
 
				+    HADOOP-6862. Adds api to add/remove user and group to AccessControlList
			
 
				+    (amareshwari)
			
 
				+
			
 
				+    HADOOP-6911. doc update for DelegationTokenFetcher (boryas)
			
 
				+
			
 
				+    HADOOP-6900. Make the iterator returned by FileSystem#listLocatedStatus to 
			
 
				+    throw IOException rather than RuntimeException when there is an IO error
			
 
				+    fetching the next file. (hairong)
			
 
				+
			
 
				+    HADOOP-6905. Better logging messages when a delegation token is invalid.
			
 
				+    (Kan Zhang via jghoman)
			
 
				+
			
 
				+    HADOOP-6693. Add metrics to track kerberol login activity. (suresh)
			
 
				+
			
 
				+    HADOOP-6803. Add native gzip read/write coverage to TestCodec.
			
 
				+    (Eli Collins via tomwhite)
			
 
				+
			
 
				+    HADOOP-6950. Suggest that HADOOP_CLASSPATH should be preserved in 
			
 
				+    hadoop-env.sh.template. (Philip Zeyliger via Eli Collins)
			
 
				+
			
 
				+    HADOOP-6922. Make AccessControlList a writable and update documentation
			
 
				+    for Job ACLs.  (Ravi Gummadi via vinodkv)
			
 
				+
			
 
				+    HADOOP-6965. Introduces checks for whether the original tgt is valid 
			
 
				+    in the reloginFromKeytab method.
			
 
				+
			
 
				+    HADOOP-6856. Simplify constructors for SequenceFile, and MapFile. (omalley)
			
 
				+
			
 
				+    HADOOP-6987. Use JUnit Rule to optionally fail test cases that run more
			
 
				+    than 10 seconds (jghoman)
			
 
				+
			
 
				+    HADOOP-7005. Update test-patch.sh to remove callback to Hudson. (nigel)
			
 
				+
			
 
				+    HADOOP-6985. Suggest that HADOOP_OPTS be preserved in
			
 
				+    hadoop-env.sh.template. (Ramkumar Vadali via cutting)
			
 
				+
			
 
				+    HADOOP-7007. Update the hudson-test-patch ant target to work with the
			
 
				+    latest test-patch.sh script (gkesavan)
			
 
				+
			
 
				+    HADOOP-7010. Typo in FileSystem.java. (Jingguo Yao via eli)
			
 
				+
			
 
				+    HADOOP-7009. MD5Hash provides a public factory method that creates an
			
 
				+    instance of thread local MessageDigest. (hairong)
			
 
				+
			
 
				+    HADOOP-7008. Enable test-patch.sh to have a configured number of acceptable 
			
 
				+    findbugs and javadoc warnings. (nigel and gkesavan)
			
 
				+
			
 
				+    HADOOP-6818. Provides a JNI implementation of group resolution. (ddas)
			
 
				+
			
 
				+    HADOOP-6943. The GroupMappingServiceProvider interface should be public.
			
 
				+    (Aaron T. Myers via tomwhite)
			
 
				+
			
 
				+    HADOOP-4675. Current Ganglia metrics implementation is incompatible with
			
 
				+    Ganglia 3.1. (Brian Bockelman via tomwhite)
			
 
				+
			
 
				+    HADOOP-6977. Herriot daemon clients should vend statistics (cos)
			
 
				+
			
 
				+    HADOOP-7024. Create a test method for adding file systems during tests.
			
 
				+    (Kan Zhang via jghoman)
			
 
				+
			
 
				+    HADOOP-6903. Make AbstractFSileSystem methods and some FileContext methods to be public. (Sanjay Radia via Sanjay Radia)
			
 
				+
			
 
				+    HADOOP-7034. Add TestPath tests to cover dot, dot dot, and slash normalization. (eli)
			
 
				+
			
 
				+    HADOOP-7032. Assert type constraints in the FileStatus constructor. (eli)
			
 
				+
			
 
				+    HADOOP-6562. FileContextSymlinkBaseTest should use FileContextTestHelper. (eli)
			
 
				+
			
 
				+    HADOOP-7028. ant eclipse does not include requisite ant.jar in the 
			
 
				+    classpath. (Patrick Angeles via eli)
			
 
				+
			
 
				+    HADOOP-6864. Provide a JNI-based implementation of ShellBasedUnixGroupsNetgroupMapping 
			
 
				+    (implementation of GroupMappingServiceProvider) (Erik Seffl via boryas)
			
 
				+
			
 
				+    HADOOP-7187. Fix socket leak in GangliaContext.  (Uma Maheswara Rao G
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-7241. fix typo of command 'hadoop fs -help tail'. 
			
 
				+    (Wei Yongjun via eli)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+    HADOOP-6884. Add LOG.isDebugEnabled() guard for each LOG.debug(..).
			
 
				+    (Erik Steffl via szetszwo)
			
 
				+
			
 
				+    HADOOP-6683. ZlibCompressor does not fully utilize the buffer.
			
 
				+    (Kang Xiao via eli)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-6638. try to relogin in a case of failed RPC connection (expired 
			
 
				+    tgt) only in case the subject is loginUser or proxyUgi.realUser. (boryas)
			
 
				+
			
 
				+    HADOOP-6781. security audit log shouldn't have exception in it. (boryas)
			
 
				+
			
 
				+    HADOOP-6612.  Protocols RefreshUserToGroupMappingsProtocol and 
			
 
				+    RefreshAuthorizationPolicyProtocol will fail with security enabled (boryas)
			
 
				+
			
 
				+    HADOOP-6764. Remove verbose logging from the Groups class. (Boris Shkolnik)
			
 
				+
			
 
				+    HADOOP-6730. Bug in FileContext#copy and provide base class for 
			
 
				+    FileContext tests. (Ravi Phulari via jghoman)
			
 
				+
			
 
				+    HADOOP-6669. Respect compression configuration when creating DefaultCodec
			
 
				+    instances. (Koji Noguchi via cdouglas)
			
 
				+
			
 
				+    HADOOP-6747. TestNetUtils fails on Mac OS X. (Todd Lipcon via jghoman)
			
 
				+
			
 
				+    HADOOP-6787. Factor out glob pattern code from FileContext and
			
 
				+    Filesystem. Also fix bugs identified in HADOOP-6618 and make the
			
 
				+    glob pattern code less restrictive and more POSIX standard
			
 
				+    compliant. (Luke Lu via eli)
			
 
				+
			
 
				+    HADOOP-6649.  login object in UGI should be inside the subject (jnp via 
			
 
				+    boryas)
			
 
				+
			
 
				+    HADOOP-6687.   user object in the subject in UGI should be reused in case 
			
 
				+    of a relogin. (jnp via boryas)
			
 
				+
			
 
				+    HADOOP-6603. Provide workaround for issue with Kerberos not resolving 
			
 
				+    cross-realm principal (Kan Zhang and Jitendra Pandey via jghoman)
			
 
				+
			
 
				+    HADOOP-6620. NPE if renewer is passed as null in getDelegationToken.
			
 
				+    (Jitendra Pandey via jghoman)
			
 
				+
			
 
				+    HADOOP-6613. Moves the RPC version check ahead of the AuthMethod check.
			
 
				+    (Kan Zhang via ddas)
			
 
				+
			
 
				+    HADOOP-6682. NetUtils:normalizeHostName does not process hostnames starting
			
 
				+    with [a-f] correctly. (jghoman)
			
 
				+
			
 
				+    HADOOP-6652. Removes the unnecessary cache from 
			
 
				+    ShellBasedUnixGroupsMapping. (ddas)
			
 
				+
			
 
				+    HADOOP-6815. refreshSuperUserGroupsConfiguration should use server side 
			
 
				+    configuration for the refresh (boryas)
			
 
				+
			
 
				+    HADOOP-6648. Adds a check for null tokens in Credentials.addToken api.
			
 
				+    (ddas)
			
 
				+ 
			
 
				+    HADOOP-6647. balancer fails with "is not authorized for protocol 
			
 
				+    interface NamenodeProtocol" in secure environment (boryas)
			
 
				+
			
 
				+    HADOOP-6834. TFile.append compares initial key against null lastKey
			
 
				+    (hong tang via mahadev)
			
 
				+
			
 
				+    HADOOP-6670. Use the UserGroupInformation's Subject as the criteria for
			
 
				+    equals and hashCode. (Owen O'Malley and Kan Zhang via ddas)
			
 
				+
			
 
				+    HADOOP-6536. Fixes FileUtil.fullyDelete() not to delete the contents of
			
 
				+    the sym-linked directory. (Ravi Gummadi via amareshwari)
			
 
				+
			
 
				+    HADOOP-6873. using delegation token over hftp for long 
			
 
				+    running clients (boryas)
			
 
				+
			
 
				+    HADOOP-6706. Improves the sasl failure handling due to expired tickets,
			
 
				+    and other server detected failures. (Jitendra Pandey and ddas via ddas)
			
 
				+
			
 
				+    HADOOP-6715. Fixes AccessControlList.toString() to return a descriptive
			
 
				+    String representation of the ACL. (Ravi Gummadi via amareshwari)
			
 
				+
			
 
				+    HADOOP-6885. Fix java doc warnings in Groups and 
			
 
				+    RefreshUserMappingsProtocol. (Eli Collins via jghoman) 
			
 
				+
			
 
				+    HADOOP-6482. GenericOptionsParser constructor that takes Options and 
			
 
				+    String[] ignores options. (Eli Collins via jghoman)
			
 
				+
			
 
				+    HADOOP-6906.  FileContext copy() utility doesn't work with recursive
			
 
				+    copying of directories. (vinod k v via mahadev)
			
 
				+
			
 
				+    HADOOP-6453. Hadoop wrapper script shouldn't ignore an existing 
			
 
				+    JAVA_LIBRARY_PATH. (Chad Metcalf via jghoman)
			
 
				+
			
 
				+    HADOOP-6932.  Namenode start (init) fails because of invalid kerberos 
			
 
				+    key, even when security set to "simple" (boryas)
			
 
				+
			
 
				+    HADOOP-6913. Circular initialization between UserGroupInformation and 
			
 
				+    KerberosName (Kan Zhang via boryas)
			
 
				+
			
 
				+    HADOOP-6907. Rpc client doesn't use the per-connection conf to figure
			
 
				+    out server's Kerberos principal (Kan Zhang via hairong)
			
 
				+
			
 
				+    HADOOP-6938. ConnectionId.getRemotePrincipal() should check if security
			
 
				+    is enabled. (Kan Zhang via hairong)
			
 
				+
			
 
				+    HADOOP-6930. AvroRpcEngine doesn't work with generated Avro code. 
			
 
				+    (sharad)
			
 
				+
			
 
				+    HADOOP-6940. RawLocalFileSystem's markSupported method misnamed 
			
 
				+    markSupport. (Tom White via eli).
			
 
				+
			
 
				+    HADOOP-6951.  Distinct minicluster services (e.g. NN and JT) overwrite each
			
 
				+    other's service policies.  (Aaron T. Myers via tomwhite)
			
 
				+
			
 
				+    HADOOP-6879. Provide SSH based (Jsch) remote execution API for system
			
 
				+    tests (cos)
			
 
				+
			
 
				+    HADOOP-6989. Correct the parameter for SetFile to set the value type
			
 
				+    for SetFile to be NullWritable instead of the key. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-6984. Combine the compress kind and the codec in the same option
			
 
				+    for SequenceFiles. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-6933. TestListFiles is flaky. (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6947.  Kerberos relogin should set refreshKrb5Config to true.
			
 
				+    (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-7006. Fix 'fs -getmerge' command to not be a no-op.
			
 
				+    (Chris Nauroth via cutting)
			
 
				+
			
 
				+    HADOOP-6663.  BlockDecompressorStream get EOF exception when decompressing
			
 
				+    the file compressed from empty file.  (Kang Xiao via tomwhite)
			
 
				+
			
 
				+    HADOOP-6991.  Fix SequenceFile::Reader to honor file lengths and call
			
 
				+    openFile (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-7011.  Fix KerberosName.main() to not throw an NPE.
			
 
				+    (Aaron T. Myers via tomwhite)
			
 
				+
			
 
				+    HADOOP-6975.  Integer overflow in S3InputStream for blocks > 2GB.
			
 
				+    (Patrick Kling via tomwhite)
			
 
				+
			
 
				+    HADOOP-6758. MapFile.fix does not allow index interval definition.
			
 
				+    (Gianmarco De Francisci Morales via tomwhite)
			
 
				+
			
 
				+    HADOOP-6926. SocketInputStream incorrectly implements read().
			
 
				+    (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6899 RawLocalFileSystem#setWorkingDir() does not work for relative names
			
 
				+     (Sanjay Radia)
			
 
				+
			
 
				+    HADOOP-6496. HttpServer sends wrong content-type for CSS files
			
 
				+    (and others). (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-7057. IOUtils.readFully and IOUtils.skipFully have typo in
			
 
				+    exception creation's message. (cos)
			
 
				+
			
 
				+Release 0.21.1 - Unreleased
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-6934. Test for ByteWritable comparator.
			
 
				+    (Johannes Zillmann via Eli Collins)
			
 
				+
			
 
				+    HADOOP-6786. test-patch needs to verify Herriot integrity (cos)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-6925. BZip2Codec incorrectly implements read(). 
			
 
				+    (Todd Lipcon via Eli Collins)
			
 
				+
			
 
				+    HADOOP-6833. IPC leaks call parameters when exceptions thrown.
			
 
				+    (Todd Lipcon via Eli Collins)
			
 
				+
			
 
				+    HADOOP-6971. Clover build doesn't generate per-test coverage (cos)
			
 
				+
			
 
				+    HADOOP-6993. Broken link on cluster setup page of docs. (eli)
			
 
				+
			
 
				+    HADOOP-6944. [Herriot] Implement a functionality for getting proxy users
			
 
				+    definitions like groups and hosts. (Vinay Thota via cos)
			
 
				+
			
 
				+    HADOOP-6954.  Sources JARs are not correctly published to the Maven
			
 
				+    repository. (tomwhite)
			
 
				+
			
 
				+    HADOOP-7052. misspelling of threshold in conf/log4j.properties.
			
 
				+    (Jingguo Yao via eli)
			
 
				+
			
 
				+    HADOOP-7053. wrong FSNamesystem Audit logging setting in 
			
 
				+    conf/log4j.properties. (Jingguo Yao via eli)
			
 
				+
			
 
				+    HADOOP-7162. Rmove a duplicated call FileSystem.listStatus(..) in FsShell.
			
 
				+    (Alexey Diomin via szetszwo)
			
 
				+
			
 
				+    HADOOP-7117. Remove fs.checkpoint.* from core-default.xml and replace
			
 
				+    fs.checkpoint.* with dfs.namenode.checkpoint.* in documentations.
			
 
				+    (Harsh J Chouraria via szetszwo)
			
 
				+
			
 
				+    HADOOP-7193. Correct the "fs -touchz" command help message.
			
 
				+    (Uma Maheswara Rao G via szetszwo)
			
 
				+
			
 
				+    HADOOP-7174. Null is displayed in the "fs -copyToLocal" command.
			
 
				+    (Uma Maheswara Rao G via szetszwo)
			
 
				+
			
 
				+    HADOOP-7194. Fix resource leak in IOUtils.copyBytes(..).
			
 
				+    (Devaraj K via szetszwo)
			
 
				+
			
 
				+Release 0.21.0 - 2010-08-13
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-4895. Remove deprecated methods DFSClient.getHints(..) and
			
 
				+    DFSClient.isDirectory(..).  (szetszwo)
			
 
				+
			
 
				+    HADOOP-4941. Remove deprecated FileSystem methods: getBlockSize(Path f),
			
 
				+    getLength(Path f) and getReplication(Path src).  (szetszwo)
			
 
				+
			
 
				+    HADOOP-4648. Remove obsolete, deprecated InMemoryFileSystem and
			
 
				+    ChecksumDistributedFileSystem.  (cdouglas via szetszwo)
			
 
				+
			
 
				+    HADOOP-4940. Remove a deprecated method FileSystem.delete(Path f).  (Enis
			
 
				+    Soztutar via szetszwo)
			
 
				+
			
 
				+    HADOOP-4010. Change semantics for LineRecordReader to read an additional
			
 
				+    line per split- rather than moving back one character in the stream- to
			
 
				+    work with splittable compression codecs. (Abdul Qadeer via cdouglas)
			
 
				+
			
 
				+    HADOOP-5094. Show hostname and separate live/dead datanodes in DFSAdmin
			
 
				+    report.  (Jakob Homan via szetszwo)
			
 
				+
			
 
				+    HADOOP-4942. Remove deprecated FileSystem methods getName() and
			
 
				+    getNamed(String name, Configuration conf).  (Jakob Homan via szetszwo)
			
 
				+
			
 
				+    HADOOP-5486. Removes the CLASSPATH string from the command line and instead
			
 
				+    exports it in the environment. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2827. Remove deprecated NetUtils::getServerAddress. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5681. Change examples RandomWriter and RandomTextWriter to 
			
 
				+    use new mapreduce API. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5680. Change org.apache.hadoop.examples.SleepJob to use new 
			
 
				+    mapreduce api. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5699. Change org.apache.hadoop.examples.PiEstimator to use 
			
 
				+    new mapreduce api. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5720. Introduces new task types - JOB_SETUP, JOB_CLEANUP
			
 
				+    and TASK_CLEANUP. Removes the isMap methods from TaskID/TaskAttemptID
			
 
				+    classes. (ddas)
			
 
				+
			
 
				+    HADOOP-5668. Change TotalOrderPartitioner to use new API. (Amareshwari
			
 
				+    Sriramadasu via cdouglas)
			
 
				+
			
 
				+    HADOOP-5738. Split "waiting_tasks" JobTracker metric into waiting maps and
			
 
				+    waiting reduces. (Sreekanth Ramakrishnan via cdouglas)
			
 
				+
			
 
				+    HADOOP-5679. Resolve findbugs warnings in core/streaming/pipes/examples. 
			
 
				+    (Jothi Padmanabhan via sharad)
			
 
				+
			
 
				+    HADOOP-4359. Support for data access authorization checking on Datanodes.
			
 
				+    (Kan Zhang via rangadi)
			
 
				+
			
 
				+    HADOOP-5690. Change org.apache.hadoop.examples.DBCountPageView to use 
			
 
				+    new mapreduce api. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5694. Change org.apache.hadoop.examples.dancing to use new 
			
 
				+    mapreduce api. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5696. Change org.apache.hadoop.examples.Sort to use new 
			
 
				+    mapreduce api. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5698. Change org.apache.hadoop.examples.MultiFileWordCount to 
			
 
				+    use new mapreduce api. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5913. Provide ability to an administrator to stop and start
			
 
				+    job queues. (Rahul Kumar Singh and Hemanth Yamijala via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-711. Removed Distributed Cache from Common, to move it
			
 
				+    under Map/Reduce. (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-6201. Change FileSystem::listStatus contract to throw
			
 
				+    FileNotFoundException if the directory does not exist, rather than letting
			
 
				+    this be implementation-specific. (Jakob Homan via cdouglas)
			
 
				+
			
 
				+    HADOOP-6230. Moved process tree and memory calculator related classes
			
 
				+    from Common to Map/Reduce. (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-6203. FsShell rm/rmr error message indicates exceeding Trash quota
			
 
				+    and suggests using -skpTrash, when moving to trash fails.
			
 
				+    (Boris Shkolnik via suresh)
			
 
				+
			
 
				+    HADOOP-6303. Eclipse .classpath template has outdated jar files and is
			
 
				+    missing some new ones.  (cos)
			
 
				+
			
 
				+    HADOOP-6396. Fix uninformative exception message when unable to parse
			
 
				+    umask. (jghoman)
			
 
				+
			
 
				+    HADOOP-6299. Reimplement the UserGroupInformation to use the OS
			
 
				+    specific and Kerberos JAAS login. (omalley)
			
 
				+
			
 
				+    HADOOP-6686. Remove redundant exception class name from the exception
			
 
				+    message for the exceptions thrown at RPC client. (suresh)
			
 
				+
			
 
				+    HADOOP-6701. Fix incorrect exit codes returned from chmod, chown and chgrp
			
 
				+    commands from FsShell. (Ravi Phulari via suresh)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-6332. Large-scale Automated Test Framework. (sharad, Sreekanth
			
 
				+    Ramakrishnan, at all via cos)
			
 
				+
			
 
				+    HADOOP-4268. Change fsck to use ClientProtocol methods so that the
			
 
				+    corresponding permission requirement for running the ClientProtocol
			
 
				+    methods will be enforced.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-3953. Implement sticky bit for directories in HDFS. (Jakob Homan
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-4368. Implement df in FsShell to show the status of a FileSystem.
			
 
				+    (Craig Macdonald via szetszwo)
			
 
				+
			
 
				+    HADOOP-3741. Add a web ui to the SecondaryNameNode for showing its status.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-5018. Add pipelined writers to Chukwa. (Ari Rabkin via cdouglas)
			
 
				+
			
 
				+    HADOOP-5052. Add an example computing exact digits of pi using the
			
 
				+    Bailey-Borwein-Plouffe algorithm. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-4927. Adds a generic wrapper around outputformat to allow creation of
			
 
				+    output on demand (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-5144. Add a new DFSAdmin command for changing the setting of restore
			
 
				+    failed storage replicas in namenode. (Boris Shkolnik via szetszwo)
			
 
				+
			
 
				+    HADOOP-5258. Add a new DFSAdmin command to print a tree of the rack and
			
 
				+    datanode topology as seen by the namenode.  (Jakob Homan via szetszwo)
			
 
				+    
			
 
				+    HADOOP-4756. A command line tool to access JMX properties on NameNode
			
 
				+    and DataNode. (Boris Shkolnik via rangadi)
			
 
				+
			
 
				+    HADOOP-4539. Introduce backup node and checkpoint node. (shv)
			
 
				+
			
 
				+    HADOOP-5363. Add support for proxying connections to multiple clusters with
			
 
				+    different versions to hdfsproxy. (Zhiyong Zhang via cdouglas)
			
 
				+
			
 
				+    HADOOP-5528. Add a configurable hash partitioner operating on ranges of
			
 
				+    BinaryComparable keys. (Klaas Bosteels via shv)
			
 
				+
			
 
				+    HADOOP-5257. HDFS servers may start and stop external components through
			
 
				+    a plugin interface. (Carlos Valiente via dhruba)
			
 
				+
			
 
				+    HADOOP-5450. Add application-specific data types to streaming's typed bytes
			
 
				+    interface. (Klaas Bosteels via omalley)
			
 
				+
			
 
				+    HADOOP-5518. Add contrib/mrunit, a MapReduce unit test framework.
			
 
				+    (Aaron Kimball via cutting)
			
 
				+
			
 
				+    HADOOP-5469.  Add /metrics servlet to daemons, providing metrics
			
 
				+    over HTTP as either text or JSON.  (Philip Zeyliger via cutting)
			
 
				+
			
 
				+    HADOOP-5467. Introduce offline fsimage image viewer. (Jakob Homan via shv)
			
 
				+
			
 
				+    HADOOP-5752. Add a new hdfs image processor, Delimited, to oiv. (Jakob
			
 
				+    Homan via szetszwo)
			
 
				+
			
 
				+    HADOOP-5266. Adds the capability to do mark/reset of the reduce values 
			
 
				+    iterator in the Context object API. (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-5745. Allow setting the default value of maxRunningJobs for all
			
 
				+    pools. (dhruba via matei)
			
 
				+
			
 
				+    HADOOP-5643. Adds a way to decommission TaskTrackers while the JobTracker
			
 
				+    is running. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-4829. Allow FileSystem shutdown hook to be disabled.
			
 
				+    (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-5815. Sqoop: A database import tool for Hadoop.
			
 
				+    (Aaron Kimball via tomwhite)
			
 
				+
			
 
				+    HADOOP-4861. Add disk usage with human-readable size (-duh).
			
 
				+    (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-5844. Use mysqldump when connecting to local mysql instance in Sqoop.
			
 
				+    (Aaron Kimball via tomwhite)
			
 
				+
			
 
				+    HADOOP-5976. Add a new command, classpath, to the hadoop script.  (Owen
			
 
				+    O'Malley and Gary Murry via szetszwo)
			
 
				+
			
 
				+    HADOOP-6120. Add support for Avro specific and reflect data.
			
 
				+    (sharad via cutting)
			
 
				+
			
 
				+    HADOOP-6226. Moves BoundedByteArrayOutputStream from the tfile package to
			
 
				+    the io package and makes it available to other users (MAPREDUCE-318). 
			
 
				+    (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-6105. Adds support for automatically handling deprecation of
			
 
				+    configuration keys. (V.V.Chaitanya Krishna via yhemanth)
			
 
				+    
			
 
				+    HADOOP-6235. Adds new method to FileSystem for clients to get server
			
 
				+    defaults. (Kan Zhang via suresh)
			
 
				+
			
 
				+    HADOOP-6234. Add new option dfs.umaskmode to set umask in configuration
			
 
				+    to use octal or symbolic instead of decimal. (Jakob Homan via suresh)
			
 
				+
			
 
				+    HADOOP-5073. Add annotation mechanism for interface classification.
			
 
				+    (Jakob Homan via suresh)
			
 
				+
			
 
				+    HADOOP-4012. Provide splitting support for bzip2 compressed files. (Abdul
			
 
				+    Qadeer via cdouglas)
			
 
				+
			
 
				+    HADOOP-6246. Add backward compatibility support to use deprecated decimal 
			
 
				+    umask from old configuration. (Jakob Homan via suresh)
			
 
				+
			
 
				+    HADOOP-4952. Add new improved file system interface FileContext for the
			
 
				+    application writer (Sanjay Radia via suresh)
			
 
				+
			
 
				+    HADOOP-6170. Add facility to tunnel Avro RPCs through Hadoop RPCs.
			
 
				+    This permits one to take advantage of both Avro's RPC versioning
			
 
				+    features and Hadoop's proven RPC scalability.  (cutting)
			
 
				+
			
 
				+    HADOOP-6267. Permit building contrib modules located in external
			
 
				+    source trees.  (Todd Lipcon via cutting)
			
 
				+
			
 
				+    HADOOP-6240. Add new FileContext rename operation that posix compliant
			
 
				+    that allows overwriting existing destination. (suresh)
			
 
				+
			
 
				+    HADOOP-6204. Implementing aspects development and fault injeciton
			
 
				+    framework for Hadoop (cos)
			
 
				+
			
 
				+    HADOOP-6313. Implement Syncable interface in FSDataOutputStream to expose
			
 
				+    flush APIs to application users. (Hairong Kuang via suresh)
			
 
				+
			
 
				+    HADOOP-6284. Add a new parameter, HADOOP_JAVA_PLATFORM_OPTS, to
			
 
				+    hadoop-config.sh so that it allows setting java command options for
			
 
				+    JAVA_PLATFORM.  (Koji Noguchi via szetszwo)
			
 
				+
			
 
				+    HADOOP-6337. Updates FilterInitializer class to be more visible,
			
 
				+    and the init of the class is made to take a Configuration argument.
			
 
				+    (Jakob Homan via ddas)
			
 
				+
			
 
				+    Hadoop-6223. Add new file system interface AbstractFileSystem with
			
 
				+    implementation of some file systems that delegate to old FileSystem.
			
 
				+    (Sanjay Radia via suresh)
			
 
				+
			
 
				+    HADOOP-6433. Introduce asychronous deletion of files via a pool of
			
 
				+    threads. This can be used to delete files in the Distributed
			
 
				+    Cache. (Zheng Shao via dhruba)
			
 
				+
			
 
				+    HADOOP-6415. Adds a common token interface for both job token and 
			
 
				+    delegation token. (Kan Zhang via ddas)
			
 
				+
			
 
				+    HADOOP-6408. Add a /conf servlet to dump running configuration.
			
 
				+    (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6520. Adds APIs to read/write Token and secret keys. Also
			
 
				+    adds the automatic loading of tokens into UserGroupInformation
			
 
				+    upon login. The tokens are read from a file specified in the
			
 
				+    environment variable. (ddas)
			
 
				+
			
 
				+    HADOOP-6419. Adds SASL based authentication to RPC.
			
 
				+    (Kan Zhang via ddas)
			
 
				+
			
 
				+    HADOOP-6510. Adds a way for superusers to impersonate other users
			
 
				+    in a secure environment. (Jitendra Nath Pandey via ddas)
			
 
				+
			
 
				+    HADOOP-6421. Adds Symbolic links to FileContext, AbstractFileSystem.
			
 
				+    It also adds a limited implementation for the local file system
			
 
				+     (RawLocalFs) that allows local symlinks. (Eli Collins via Sanjay Radia)
			
 
				+
			
 
				+    HADOOP-6577. Add hidden configuration option "ipc.server.max.response.size"
			
 
				+    to change the default 1 MB, the maximum size when large IPC handler 
			
 
				+    response buffer is reset. (suresh)
			
 
				+
			
 
				+    HADOOP-6568. Adds authorization for the default servlets. 
			
 
				+    (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-6586. Log authentication and authorization failures and successes
			
 
				+    for RPC (boryas)
			
 
				+
			
 
				+    HADOOP-6580. UGI should contain authentication method. (jnp via boryas)
			
 
				+    
			
 
				+    HADOOP-6657. Add a capitalization method to StringUtils for MAPREDUCE-1545.
			
 
				+    (Luke Lu via Steve Loughran)
			
 
				+
			
 
				+    HADOOP-6692. Add FileContext#listStatus that returns an iterator.
			
 
				+    (hairong)
			
 
				+
			
 
				+    HADOOP-6869. Functionality to create file or folder on a remote daemon
			
 
				+    side (Vinay Thota via cos)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-6798. Align Ivy version for all Hadoop subprojects. (cos)
			
 
				+
			
 
				+    HADOOP-6777. Implement a functionality for suspend and resume a process.
			
 
				+    (Vinay Thota via cos)
			
 
				+
			
 
				+    HADOOP-6772. Utilities for system tests specific. (Vinay Thota via cos)
			
 
				+
			
 
				+    HADOOP-6771. Herriot's artifact id for Maven deployment should be set to
			
 
				+    hadoop-core-instrumented (cos)
			
 
				+
			
 
				+    HADOOP-6752. Remote cluster control functionality needs JavaDocs
			
 
				+    improvement (Balaji Rajagopalan via cos).
			
 
				+
			
 
				+    HADOOP-4565. Added CombineFileInputFormat to use data locality information
			
 
				+    to create splits. (dhruba via zshao)
			
 
				+
			
 
				+    HADOOP-4936. Improvements to TestSafeMode. (shv)
			
 
				+
			
 
				+    HADOOP-4985. Remove unnecessary "throw IOException" declarations in
			
 
				+    FSDirectory related methods.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5017. Change NameNode.namesystem declaration to private.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-4794. Add branch information from the source version control into
			
 
				+    the version information that is compiled into Hadoop. (cdouglas via 
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-5070. Increment copyright year to 2009, remove assertions of ASF
			
 
				+    copyright to licensed files. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-5037. Deprecate static FSNamesystem.getFSNamesystem().  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5088. Include releaseaudit target as part of developer test-patch
			
 
				+    target.  (Giridharan Kesavan via nigel)
			
 
				+
			
 
				+    HADOOP-2721. Uses setsid when creating new tasks so that subprocesses of 
			
 
				+    this process will be within this new session (and this process will be 
			
 
				+    the process leader for all the subprocesses). Killing the process leader,
			
 
				+    or the main Java task in Hadoop's case, kills the entire subtree of
			
 
				+    processes. (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-5097. Remove static variable JspHelper.fsn, a static reference to
			
 
				+    a non-singleton FSNamesystem object.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-3327. Improves handling of READ_TIMEOUT during map output copying.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-5124. Choose datanodes randomly instead of starting from the first
			
 
				+    datanode for providing fairness.  (hairong via szetszwo)
			
 
				+
			
 
				+    HADOOP-4930. Implement a Linux native executable that can be used to 
			
 
				+    launch tasks as users. (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5122. Fix format of fs.default.name value in libhdfs test conf.
			
 
				+    (Craig Macdonald via tomwhite)
			
 
				+
			
 
				+    HADOOP-5038. Direct daemon trace to debug log instead of stdout. (Jerome
			
 
				+    Boulon via cdouglas)
			
 
				+
			
 
				+    HADOOP-5101. Improve packaging by adding 'all-jars' target building core,
			
 
				+    tools, and example jars. Let findbugs depend on this rather than the 'tar'
			
 
				+    target. (Giridharan Kesavan via cdouglas)
			
 
				+
			
 
				+    HADOOP-4868. Splits the hadoop script into three parts - bin/hadoop, 
			
 
				+    bin/mapred and bin/hdfs. (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-1722. Adds support for TypedBytes and RawBytes in Streaming.
			
 
				+    (Klaas Bosteels via ddas)
			
 
				+
			
 
				+    HADOOP-4220. Changes the JobTracker restart tests so that they take much
			
 
				+    less time. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-4885. Try to restore failed name-node storage directories at 
			
 
				+    checkpoint time. (Boris Shkolnik via shv)
			
 
				+
			
 
				+    HADOOP-5209. Update year to 2009 for javadoc.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5279. Remove unnecessary targets from test-patch.sh.
			
 
				+    (Giridharan Kesavan via nigel)
			
 
				+
			
 
				+    HADOOP-5120. Remove the use of FSNamesystem.getFSNamesystem() from 
			
 
				+    UpgradeManagerNamenode and UpgradeObjectNamenode.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5222. Add offset to datanode clienttrace. (Lei Xu via cdouglas)
			
 
				+
			
 
				+    HADOOP-5240. Skip re-building javadoc when it is already
			
 
				+    up-to-date. (Aaron Kimball via cutting)
			
 
				+
			
 
				+    HADOOP-5042. Add a cleanup stage to log rollover in Chukwa appender.
			
 
				+    (Jerome Boulon via cdouglas)
			
 
				+
			
 
				+    HADOOP-5264. Removes redundant configuration object from the TaskTracker.
			
 
				+    (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-5232. Enable patch testing to occur on more than one host.
			
 
				+    (Giri Kesavan via nigel)
			
 
				+
			
 
				+    HADOOP-4546. Fix DF reporting for AIX. (Bill Habermaas via cdouglas)
			
 
				+
			
 
				+    HADOOP-5023. Add Tomcat support to HdfsProxy. (Zhiyong Zhang via cdouglas)
			
 
				+    
			
 
				+    HADOOP-5317. Provide documentation for LazyOutput Feature. 
			
 
				+    (Jothi Padmanabhan via johan)
			
 
				+
			
 
				+    HADOOP-5455. Document rpc metrics context to the extent dfs, mapred, and
			
 
				+    jvm contexts are documented. (Philip Zeyliger via cdouglas)
			
 
				+
			
 
				+    HADOOP-5358. Provide scripting functionality to the synthetic load
			
 
				+    generator. (Jakob Homan via hairong)
			
 
				+
			
 
				+    HADOOP-5442. Paginate jobhistory display and added some search
			
 
				+    capabilities. (Amar Kamat via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4842. Streaming now allows specifiying a command for the combiner.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-5196. avoiding unnecessary byte[] allocation in 
			
 
				+    SequenceFile.CompressedBytes and SequenceFile.UncompressedBytes.
			
 
				+    (hong tang via mahadev)
			
 
				+
			
 
				+    HADOOP-4655. New method FileSystem.newInstance() that always returns
			
 
				+    a newly allocated FileSystem object. (dhruba)
			
 
				+
			
 
				+    HADOOP-4788. Set Fair scheduler to assign both a map and a reduce on each
			
 
				+    heartbeat by default. (matei)
			
 
				+
			
 
				+    HADOOP-5491.  In contrib/index, better control memory usage.
			
 
				+    (Ning Li via cutting)
			
 
				+
			
 
				+    HADOOP-5423. Include option of preserving file metadata in
			
 
				+    SequenceFile::sort. (Michael Tamm via cdouglas)
			
 
				+
			
 
				+    HADOOP-5331. Add support for KFS appends. (Sriram Rao via cdouglas)
			
 
				+
			
 
				+    HADOOP-4365. Make Configuration::getProps protected in support of
			
 
				+    meaningful subclassing. (Steve Loughran via cdouglas)
			
 
				+
			
 
				+    HADOOP-2413. Remove the static variable FSNamesystem.fsNamesystemObject.
			
 
				+    (Konstantin Shvachko via szetszwo)
			
 
				+
			
 
				+    HADOOP-4584. Improve datanode block reports and associated file system
			
 
				+    scan to avoid interefering with normal datanode operations.
			
 
				+    (Suresh Srinivas via rangadi)
			
 
				+
			
 
				+    HADOOP-5502. Documentation for backup and checkpoint nodes.
			
 
				+    (Jakob Homan via shv)
			
 
				+
			
 
				+    HADOOP-5485. Mask actions in the fair scheduler's servlet UI based on
			
 
				+    value of webinterface.private.actions. 
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-5581. HDFS should throw FileNotFoundException when while opening
			
 
				+    a file that does not exist. (Brian Bockelman via rangadi)
			
 
				+
			
 
				+    HADOOP-5509. PendingReplicationBlocks does not start monitor in the
			
 
				+    constructor. (shv)
			
 
				+
			
 
				+    HADOOP-5494. Modify sorted map output merger to lazily read values,
			
 
				+    rather than buffering at least one record for each segment. (Devaraj Das
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-5396. Provide ability to refresh queue ACLs in the JobTracker
			
 
				+    without having to restart the daemon.
			
 
				+    (Sreekanth Ramakrishnan and Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-4490. Provide ability to run tasks as job owners.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5697. Change org.apache.hadoop.examples.Grep to use new 
			
 
				+    mapreduce api. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5625. Add operation duration to clienttrace. (Lei Xu via cdouglas)
			
 
				+
			
 
				+    HADOOP-5705. Improve TotalOrderPartitioner efficiency by updating the trie
			
 
				+    construction. (Dick King via cdouglas)
			
 
				+
			
 
				+    HADOOP-5589. Eliminate source limit of 64 for map-side joins imposed by
			
 
				+    TupleWritable encoding. (Jingkei Ly via cdouglas)
			
 
				+
			
 
				+    HADOOP-5734. Correct block placement policy description in HDFS
			
 
				+    Design document. (Konstantin Boudnik via shv)
			
 
				+
			
 
				+    HADOOP-5657. Validate data in TestReduceFetch to improve merge test
			
 
				+    coverage. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5613. Change S3Exception to checked exception.
			
 
				+    (Andrew Hitchcock via tomwhite)
			
 
				+
			
 
				+    HADOOP-5717. Create public enum class for the Framework counters in 
			
 
				+    org.apache.hadoop.mapreduce. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5217. Split AllTestDriver for core, hdfs and mapred. (sharad)
			
 
				+
			
 
				+    HADOOP-5364. Add certificate expiration warning to HsftpFileSystem and HDFS
			
 
				+    proxy. (Zhiyong Zhang via cdouglas)
			
 
				+
			
 
				+    HADOOP-5733. Add map/reduce slot capacity and blacklisted capacity to
			
 
				+    JobTracker metrics. (Sreekanth Ramakrishnan via cdouglas)
			
 
				+
			
 
				+    HADOOP-5596. Add EnumSetWritable. (He Yongqiang via szetszwo)
			
 
				+
			
 
				+    HADOOP-5727. Simplify hashcode for ID types. (Shevek via cdouglas)
			
 
				+
			
 
				+    HADOOP-5500. In DBOutputFormat, where field names are absent permit the
			
 
				+    number of fields to be sufficient to construct the select query. (Enis
			
 
				+    Soztutar via cdouglas)
			
 
				+
			
 
				+    HADOOP-5081. Split TestCLI into HDFS, Mapred and Core tests. (sharad)
			
 
				+
			
 
				+    HADOOP-5015. Separate block management code from FSNamesystem.  (Suresh
			
 
				+    Srinivas via szetszwo)
			
 
				+
			
 
				+    HADOOP-5080. Add new test cases to TestMRCLI and TestHDFSCLI
			
 
				+    (V.Karthikeyan via nigel)
			
 
				+
			
 
				+    HADOOP-5135. Splits the tests into different directories based on the 
			
 
				+    package. Four new test targets have been defined - run-test-core, 
			
 
				+    run-test-mapred, run-test-hdfs and run-test-hdfs-with-mr.
			
 
				+    (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-5771. Implements unit tests for LinuxTaskController.
			
 
				+    (Sreekanth Ramakrishnan and Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-5419. Provide a facility to query the Queue ACLs for the
			
 
				+    current user.
			
 
				+    (Rahul Kumar Singh via yhemanth)
			
 
				+
			
 
				+    HADOOP-5780. Improve per block message prited by "-metaSave" in HDFS.
			
 
				+    (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-5823. Added a new class DeprecatedUTF8 to help with removing
			
 
				+    UTF8 related javac warnings. These warnings are removed in 
			
 
				+    FSEditLog.java as a use case. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-5824. Deprecate DataTransferProtocol.OP_READ_METADATA and remove
			
 
				+    the corresponding unused codes.  (Kan Zhang via szetszwo)
			
 
				+
			
 
				+    HADOOP-5721. Factor out EditLogFileInputStream and EditLogFileOutputStream
			
 
				+    into independent classes. (Luca Telloli & Flavio Junqueira via shv)
			
 
				+
			
 
				+    HADOOP-5838. Fix a few javac warnings in HDFS. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-5854. Fix a few "Inconsistent Synchronization" warnings in HDFS.
			
 
				+    (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-5369. Small tweaks to reduce MapFile index size. (Ben Maurer 
			
 
				+    via sharad)
			
 
				+
			
 
				+    HADOOP-5858. Eliminate UTF8 and fix warnings in test/hdfs-with-mr package.
			
 
				+    (shv)
			
 
				+
			
 
				+    HADOOP-5866. Move DeprecatedUTF8 from o.a.h.io to o.a.h.hdfs since it may
			
 
				+    not be used outside hdfs. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-5857. Move normal java methods from hdfs .jsp files to .java files.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-5873. Remove deprecated methods randomDataNode() and
			
 
				+    getDatanodeByIndex(..) in FSNamesystem.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5572. Improves the progress reporting for the sort phase for both
			
 
				+    maps and reduces. (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-5839. Fix EC2 scripts to allow remote job submission.
			
 
				+    (Joydeep Sen Sarma via tomwhite)
			
 
				+
			
 
				+    HADOOP-5877. Fix javac warnings in TestHDFSServerPorts, TestCheckpoint, 
			
 
				+    TestNameEditsConfig, TestStartup and TestStorageRestore.
			
 
				+    (Jakob Homan via shv)
			
 
				+
			
 
				+    HADOOP-5438. Provide a single FileSystem method to create or 
			
 
				+    open-for-append to a file.  (He Yongqiang via dhruba)
			
 
				+
			
 
				+    HADOOP-5472. Change DistCp to support globbing of input paths.  (Dhruba
			
 
				+    Borthakur and Rodrigo Schmidt via szetszwo)
			
 
				+
			
 
				+    HADOOP-5175. Don't unpack libjars on classpath. (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-5620. Add an option to DistCp for preserving modification and access
			
 
				+    times.  (Rodrigo Schmidt via szetszwo)
			
 
				+
			
 
				+    HADOOP-5664. Change map serialization so a lock is obtained only where
			
 
				+    contention is possible, rather than for each write. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5896. Remove the dependency of GenericOptionsParser on 
			
 
				+    Option.withArgPattern. (Giridharan Kesavan and Sharad Agarwal via 
			
 
				+    sharad)
			
 
				+
			
 
				+    HADOOP-5784. Makes the number of heartbeats that should arrive a second
			
 
				+    at the JobTracker configurable. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-5955. Changes TestFileOuputFormat so that is uses LOCAL_MR
			
 
				+    instead of CLUSTER_MR. (Jothi Padmanabhan via das)
			
 
				+
			
 
				+    HADOOP-5948. Changes TestJavaSerialization to use LocalJobRunner 
			
 
				+    instead of MiniMR/DFS cluster. (Jothi Padmanabhan via das)
			
 
				+
			
 
				+    HADOOP-2838. Add mapred.child.env to pass environment variables to 
			
 
				+    tasktracker's child processes. (Amar Kamat via sharad)
			
 
				+
			
 
				+    HADOOP-5961. DataNode process understand generic hadoop command line
			
 
				+    options (like -Ddfs.property=value). (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-5938. Change org.apache.hadoop.mapred.jobcontrol to use new
			
 
				+    api. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-2141. Improves the speculative execution heuristic. The heuristic
			
 
				+    is currently based on the progress-rates of tasks and the expected time
			
 
				+    to complete. Also, statistics about trackers are collected, and speculative
			
 
				+    tasks are not given to the ones deduced to be slow. 
			
 
				+    (Andy Konwinski and ddas)
			
 
				+
			
 
				+    HADOOP-5952. Change "-1 tests included" wording in test-patch.sh.
			
 
				+    (Gary Murry via szetszwo)
			
 
				+
			
 
				+    HADOOP-6106. Provides an option in ShellCommandExecutor to timeout 
			
 
				+    commands that do not complete within a certain amount of time.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5925. EC2 scripts should exit on error. (tomwhite)
			
 
				+
			
 
				+    HADOOP-6109. Change Text to grow its internal buffer exponentially, rather
			
 
				+    than the max of the current length and the proposed length to improve
			
 
				+    performance reading large values. (thushara wijeratna via cdouglas)
			
 
				+
			
 
				+    HADOOP-2366. Support trimmed strings in Configuration.  (Michele Catasta
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-6099. The RPC module can be configured to not send period pings.
			
 
				+    The default behaviour of sending periodic pings remain unchanged. (dhruba)
			
 
				+
			
 
				+    HADOOP-6142. Update documentation and use of harchives for relative paths
			
 
				+    added in MAPREDUCE-739. (Mahadev Konar via cdouglas)
			
 
				+
			
 
				+    HADOOP-6148. Implement a fast, pure Java CRC32 calculator which outperforms
			
 
				+    java.util.zip.CRC32.  (Todd Lipcon and Scott Carey via szetszwo)
			
 
				+
			
 
				+    HADOOP-6146. Upgrade to JetS3t version 0.7.1. (tomwhite)
			
 
				+
			
 
				+    HADOOP-6161. Add get/setEnum methods to Configuration. (cdouglas)
			
 
				+
			
 
				+    HADOOP-6160. Fix releaseaudit target to run on specific directories.
			
 
				+    (gkesavan)
			
 
				+    
			
 
				+    HADOOP-6169. Removing deprecated method calls in TFile. (hong tang via 
			
 
				+    mahadev)
			
 
				+
			
 
				+    HADOOP-6176. Add a couple package private methods to AccessTokenHandler
			
 
				+    for testing.  (Kan Zhang via szetszwo)
			
 
				+
			
 
				+    HADOOP-6182. Fix ReleaseAudit warnings (Giridharan Kesavan and Lee Tucker
			
 
				+    via gkesavan)
			
 
				+
			
 
				+    HADOOP-6173. Change src/native/packageNativeHadoop.sh to package all
			
 
				+    native library files.  (Hong Tang via szetszwo)
			
 
				+
			
 
				+    HADOOP-6184. Provide an API to dump Configuration in a JSON format.
			
 
				+    (V.V.Chaitanya Krishna via yhemanth)
			
 
				+
			
 
				+    HADOOP-6224. Add a method to WritableUtils performing a bounded read of an
			
 
				+    encoded String. (Jothi Padmanabhan via cdouglas)
			
 
				+
			
 
				+    HADOOP-6133. Add a caching layer to Configuration::getClassByName to
			
 
				+    alleviate a performance regression introduced in a compatibility layer.
			
 
				+    (Todd Lipcon via cdouglas)
			
 
				+
			
 
				+    HADOOP-6252. Provide a method to determine if a deprecated key is set in
			
 
				+    config file. (Jakob Homan via suresh)
			
 
				+
			
 
				+    HADOOP-5879. Read compression level and strategy from Configuration for
			
 
				+    gzip compression. (He Yongqiang via cdouglas)
			
 
				+
			
 
				+    HADOOP-6216. Support comments in host files.  (Ravi Phulari and Dmytro
			
 
				+    Molkov via szetszwo)
			
 
				+
			
 
				+    HADOOP-6217. Update documentation for project split. (Corinne Chandel via 
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-6268. Add ivy jar to .gitignore. (Todd Lipcon via cdouglas)
			
 
				+
			
 
				+    HADOOP-6270. Support deleteOnExit in FileContext.  (Suresh Srinivas via
			
 
				+    szetszwo)
			
 
				+
			
 
				+    HADOOP-6233. Rename configuration keys towards API standardization and
			
 
				+    backward compatibility. (Jithendra Pandey via suresh)
			
 
				+
			
 
				+    HADOOP-6260. Add additional unit tests for FileContext util methods.
			
 
				+    (Gary Murry via suresh).
			
 
				+
			
 
				+    HADOOP-6309. Change build.xml to run tests with java asserts.  (Eli
			
 
				+    Collins via szetszwo)
			
 
				+
			
 
				+    HADOOP-6326. Hundson runs should check for AspectJ warnings and report
			
 
				+    failure if any is present (cos)
			
 
				+
			
 
				+    HADOOP-6329. Add build-fi directory to the ignore lists.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5107. Use Maven ant tasks to publish the subproject jars.
			
 
				+    (Giridharan Kesavan via omalley)
			
 
				+
			
 
				+    HADOOP-6343. Log unexpected throwable object caught in RPC.  (Jitendra Nath
			
 
				+    Pandey via szetszwo)
			
 
				+
			
 
				+    HADOOP-6367. Removes Access Token implementation from common.
			
 
				+    (Kan Zhang via ddas)
			
 
				+
			
 
				+    HADOOP-6395. Upgrade some libraries to be consistent across common, hdfs,
			
 
				+    and mapreduce. (omalley)
			
 
				+
			
 
				+    HADOOP-6398. Build is broken after HADOOP-6395 patch has been applied (cos)
			
 
				+
			
 
				+    HADOOP-6413. Move TestReflectionUtils to Common. (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6283. Improve the exception messages thrown by
			
 
				+    FileUtil$HardLink.getLinkCount(..).  (szetszwo)
			
 
				+
			
 
				+    HADOOP-6279. Add Runtime::maxMemory to JVM metrics. (Todd Lipcon via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-6305. Unify build property names to facilitate cross-projects
			
 
				+    modifications (cos)
			
 
				+
			
 
				+    HADOOP-6312. Remove unnecessary debug logging in Configuration constructor.
			
 
				+    (Aaron Kimball via cdouglas)
			
 
				+
			
 
				+    HADOOP-6366. Reduce ivy console output to ovservable level (cos)
			
 
				+
			
 
				+    HADOOP-6400. Log errors getting Unix UGI. (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6346. Add support for specifying unpack pattern regex to
			
 
				+    RunJar.unJar. (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6422. Make RPC backend plugable, protocol-by-protocol, to
			
 
				+    ease evolution towards Avro.  (cutting)
			
 
				+
			
 
				+    HADOOP-5958. Use JDK 1.6 File APIs in DF.java wherever possible.
			
 
				+    (Aaron Kimball via tomwhite)
			
 
				+
			
 
				+    HADOOP-6222. Core doesn't have TestCommonCLI facility. (cos)
			
 
				+
			
 
				+    HADOOP-6394. Add a helper class to simplify FileContext related tests and
			
 
				+    improve code reusability. (Jitendra Nath Pandey via suresh)
			
 
				+
			
 
				+    HADOOP-4656. Add a user to groups mapping service. (boryas, acmurthy)
			
 
				+
			
 
				+    HADOOP-6435. Make RPC.waitForProxy with timeout public. (Steve Loughran
			
 
				+    via tomwhite)
			
 
				+  
			
 
				+    HADOOP-6472. add tokenCache option to GenericOptionsParser for passing
			
 
				+     file with secret keys to a map reduce job. (boryas)
			
 
				+
			
 
				+    HADOOP-3205. Read multiple chunks directly from FSInputChecker subclass
			
 
				+    into user buffers. (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6479. TestUTF8 assertions could fail with better text.
			
 
				+    (Steve Loughran via tomwhite)
			
 
				+
			
 
				+    HADOOP-6155. Deprecate RecordIO anticipating Avro. (Tom White via cdouglas)
			
 
				+
			
 
				+    HADOOP-6492. Make some Avro serialization APIs public.
			
 
				+    (Aaron Kimball via cutting)
			
 
				+
			
 
				+    HADOOP-6497. Add an adapter for Avro's SeekableInput interface, so
			
 
				+    that Avro can read FileSystem data.
			
 
				+    (Aaron Kimball via cutting)
			
 
				+
			
 
				+    HADOOP-6495.  Identifier should be serialized after the password is
			
 
				+     created In Token constructor (jnp via boryas)
			
 
				+
			
 
				+    HADOOP-6518. Makes the UGI honor the env var KRB5CCNAME. 
			
 
				+    (Owen O'Malley via ddas)
			
 
				+
			
 
				+    HADOOP-6531. Enhance FileUtil with an API to delete all contents of a
			
 
				+    directory. (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    HADOOP-6547. Move DelegationToken into Common, so that it can be used by
			
 
				+    MapReduce also. (devaraj via omalley)
			
 
				+
			
 
				+    HADOOP-6552. Puts renewTGT=true and useTicketCache=true for the keytab
			
 
				+    kerberos options. (ddas)
			
 
				+
			
 
				+    HADOOP-6534. Trim whitespace from directory lists initializing
			
 
				+    LocalDirAllocator. (Todd Lipcon via cdouglas)
			
 
				+
			
 
				+    HADOOP-6559. Makes the RPC client automatically re-login when the SASL 
			
 
				+    connection setup fails. This is applicable only to keytab based logins.
			
 
				+    (Devaraj Das)
			
 
				+
			
 
				+    HADOOP-6551. Delegation token renewing and cancelling should provide
			
 
				+    meaningful exceptions when there are failures instead of returning 
			
 
				+    false. (omalley)
			
 
				+
			
 
				+    HADOOP-6583. Captures authentication and authorization metrics. (ddas)
			
 
				+
			
 
				+    HADOOP-6543. Allows secure clients to talk to unsecure clusters. 
			
 
				+    (Kan Zhang via ddas)
			
 
				+
			
 
				+    HADOOP-6579. Provide a mechanism for encoding/decoding Tokens from
			
 
				+    a url-safe string and change the commons-code library to 1.4. (omalley)
			
 
				+
			
 
				+    HADOOP-6596. Add a version field to the AbstractDelegationTokenIdentifier's
			
 
				+    serialized value. (omalley)
			
 
				+
			
 
				+    HADOOP-6573. Support for persistent delegation tokens.
			
 
				+    (Jitendra Pandey via shv)
			
 
				+
			
 
				+    HADOOP-6594. Provide a fetchdt tool via bin/hdfs. (jhoman via acmurthy) 
			
 
				+
			
 
				+    HADOOP-6589. Provide better error messages when RPC authentication fails.
			
 
				+    (Kan Zhang via omalley)
			
 
				+
			
 
				+    HADOOP-6599  Split existing RpcMetrics into RpcMetrics & RpcDetailedMetrics.
			
 
				+    (Suresh Srinivas via Sanjay Radia)
			
 
				+
			
 
				+    HADOOP-6537 Declare more detailed exceptions in FileContext and 
			
 
				+    AbstractFileSystem (Suresh Srinivas via Sanjay Radia)
			
 
				+
			
 
				+    HADOOP-6486. fix common classes to work with Avro 1.3 reflection.
			
 
				+    (cutting via tomwhite)
			
 
				+
			
 
				+    HADOOP-6591. HarFileSystem can handle paths with the whitespace characters.
			
 
				+    (Rodrigo Schmidt via dhruba)
			
 
				+
			
 
				+    HADOOP-6407. Have a way to automatically update Eclipse .classpath file
			
 
				+    when new libs are added to the classpath through Ivy. (tomwhite)
			
 
				+
			
 
				+    HADOOP-3659. Patch to allow hadoop native to compile on Mac OS X.
			
 
				+    (Colin Evans and Allen Wittenauer via tomwhite)
			
 
				+
			
 
				+    HADOOP-6471. StringBuffer -> StringBuilder - conversion of references
			
 
				+    as necessary. (Kay Kay via tomwhite)
			
 
				+
			
 
				+    HADOOP-6646. Move HarfileSystem out of Hadoop Common. (mahadev)
			
 
				+
			
 
				+    HADOOP-6566. Add methods supporting, enforcing narrower permissions on
			
 
				+    local daemon directories. (Arun Murthy and Luke Lu via cdouglas)
			
 
				+
			
 
				+    HADOOP-6705. Fix to work with 1.5 version of jiracli
			
 
				+    (Giridharan Kesavan)
			
 
				+
			
 
				+    HADOOP-6658. Exclude Private elements from generated Javadoc. (tomwhite)
			
 
				+
			
 
				+    HADOOP-6635. Install/deploy source jars to Maven repo. 
			
 
				+    (Patrick Angeles via jghoman)
			
 
				+
			
 
				+    HADOOP-6717. Log levels in o.a.h.security.Groups too high 
			
 
				+    (Todd Lipcon via jghoman)
			
 
				+
			
 
				+    HADOOP-6667. RPC.waitForProxy should retry through NoRouteToHostException.
			
 
				+    (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6677. InterfaceAudience.LimitedPrivate should take a string not an
			
 
				+    enum. (tomwhite)
			
 
				+
			
 
				+    HADOOP-678. Remove FileContext#isFile, isDirectory, and exists.
			
 
				+    (Eli Collins via hairong)
			
 
				+
			
 
				+    HADOOP-6515. Make maximum number of http threads configurable.
			
 
				+    (Scott Chen via zshao)
			
 
				+
			
 
				+    HADOOP-6563. Add more symlink tests to cover intermediate symlinks
			
 
				+    in paths. (Eli Collins via suresh)
			
 
				+
			
 
				+    HADOOP-6585.  Add FileStatus#isDirectory and isFile.  (Eli Collins via
			
 
				+    tomwhite)
			
 
				+
			
 
				+    HADOOP-6738.  Move cluster_setup.xml from MapReduce to Common.
			
 
				+    (Tom White via tomwhite)
			
 
				+
			
 
				+    HADOOP-6794. Move configuration and script files post split. (tomwhite)
			
 
				+
			
 
				+    HADOOP-6403.  Deprecate EC2 bash scripts.  (tomwhite)
			
 
				+
			
 
				+    HADOOP-6769. Add an API in FileSystem to get FileSystem instances based 
			
 
				+    on users(ddas via boryas)
			
 
				+
			
 
				+    HADOOP-6813. Add a new newInstance method in FileSystem that takes 
			
 
				+    a "user" as argument (ddas via boryas)
			
 
				+
			
 
				+    HADOOP-6668.  Apply audience and stability annotations to classes in
			
 
				+    common.  (tomwhite)
			
 
				+
			
 
				+    HADOOP-6821.  Document changes to memory monitoring.  (Hemanth Yamijala
			
 
				+    via tomwhite)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+    HADOOP-5595. NameNode does not need to run a replicator to choose a
			
 
				+    random DataNode. (hairong)
			
 
				+
			
 
				+    HADOOP-5603. Improve NameNode's block placement performance. (hairong)
			
 
				+
			
 
				+    HADOOP-5638. More improvement on block placement performance. (hairong)
			
 
				+
			
 
				+    HADOOP-6180. NameNode slowed down when many files with same filename
			
 
				+    were moved to Trash. (Boris Shkolnik via hairong)
			
 
				+
			
 
				+    HADOOP-6166. Further improve the performance of the pure-Java CRC32
			
 
				+    implementation. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-6271. Add recursive and non recursive create and mkdir to 
			
 
				+    FileContext. (Sanjay Radia via suresh)
			
 
				+
			
 
				+    HADOOP-6261. Add URI based tests for FileContext. 
			
 
				+    (Ravi Pulari via suresh).
			
 
				+
			
 
				+    HADOOP-6307. Add a new SequenceFile.Reader constructor in order to support
			
 
				+    reading on un-closed file.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..).
			
 
				+    (mahadev via szetszwo)
			
 
				+
			
 
				+    HADOOP-6569. FsShell#cat should avoid calling unecessary getFileStatus
			
 
				+    before opening a file to read. (hairong)
			
 
				+
			
 
				+    HADOOP-6689. Add directory renaming test to existing FileContext tests.
			
 
				+    (Eli Collins via suresh)
			
 
				+
			
 
				+    HADOOP-6713. The RPC server Listener thread is a scalability bottleneck.
			
 
				+    (Dmytro Molkov via hairong)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-6748. Removes hadoop.cluster.administrators, cluster administrators
			
 
				+    acl is passed as parameter in constructor. (amareshwari) 
			
 
				+
			
 
				+    HADOOP-6828. Herrior uses old way of accessing logs directories (Sreekanth
			
 
				+    Ramakrishnan via cos)
			
 
				+
			
 
				+    HADOOP-6788. [Herriot] Exception exclusion functionality is not working
			
 
				+    correctly. (Vinay Thota via cos)
			
 
				+
			
 
				+    HADOOP-6773. Ivy folder contains redundant files (cos)
			
 
				+
			
 
				+    HADOOP-5379. CBZip2InputStream to throw IOException on data crc error.
			
 
				+    (Rodrigo Schmidt via zshao)
			
 
				+
			
 
				+    HADOOP-5326. Fixes CBZip2OutputStream data corruption problem.
			
 
				+    (Rodrigo Schmidt via zshao)
			
 
				+
			
 
				+    HADOOP-4963. Fixes a logging to do with getting the location of
			
 
				+    map output file. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2337. Trash should close FileSystem on exit and should not start 
			
 
				+    emtying thread if disabled. (shv)
			
 
				+
			
 
				+    HADOOP-5072. Fix failure in TestCodec because testSequenceFileGzipCodec 
			
 
				+    won't pass without native gzip codec. (Zheng Shao via dhruba)
			
 
				+
			
 
				+    HADOOP-5050. TestDFSShell.testFilePermissions should not assume umask
			
 
				+    setting.  (Jakob Homan via szetszwo)
			
 
				+
			
 
				+    HADOOP-4975. Set classloader for nested mapred.join configs. (Jingkei Ly
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-5078. Remove invalid AMI kernel in EC2 scripts. (tomwhite)
			
 
				+
			
 
				+    HADOOP-5045. FileSystem.isDirectory() should not be deprecated.  (Suresh
			
 
				+    Srinivas via szetszwo)
			
 
				+
			
 
				+    HADOOP-4960. Use datasource time, rather than system time, during metrics
			
 
				+    demux. (Eric Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-5032. Export conf dir set in config script. (Eric Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-5176. Fix a typo in TestDFSIO.  (Ravi Phulari via szetszwo)
			
 
				+
			
 
				+    HADOOP-4859. Distinguish daily rolling output dir by adding a timestamp.
			
 
				+    (Jerome Boulon via cdouglas)
			
 
				+
			
 
				+    HADOOP-4959. Correct system metric collection from top on Redhat 5.1. (Eric
			
 
				+    Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-5039. Fix log rolling regex to process only the relevant
			
 
				+    subdirectories. (Jerome Boulon via cdouglas)
			
 
				+
			
 
				+    HADOOP-5095. Update Chukwa watchdog to accept config parameter. (Jerome
			
 
				+    Boulon via cdouglas)
			
 
				+
			
 
				+    HADOOP-5147. Correct reference to agent list in Chukwa bin scripts. (Ari
			
 
				+    Rabkin via cdouglas)
			
 
				+
			
 
				+    HADOOP-5148. Fix logic disabling watchdog timer in Chukwa daemon scripts.
			
 
				+    (Ari Rabkin via cdouglas)
			
 
				+
			
 
				+    HADOOP-5100. Append, rather than truncate, when creating log4j metrics in
			
 
				+    Chukwa. (Jerome Boulon via cdouglas)
			
 
				+
			
 
				+    HADOOP-5204. Fix broken trunk compilation on Hudson by letting 
			
 
				+    task-controller be an independent target in build.xml.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5212. Fix the path translation problem introduced by HADOOP-4868 
			
 
				+    running on cygwin. (Sharad Agarwal via omalley)
			
 
				+
			
 
				+    HADOOP-5226. Add license headers to html and jsp files.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5172. Disable misbehaving Chukwa unit test until it can be fixed.
			
 
				+    (Jerome Boulon via nigel)
			
 
				+
			
 
				+    HADOOP-4933. Fixes a ConcurrentModificationException problem that shows up
			
 
				+    when the history viewer is accessed concurrently. 
			
 
				+    (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5253. Remove duplicate call to cn-docs target. 
			
 
				+    (Giri Kesavan via nigel)
			
 
				+
			
 
				+    HADOOP-5251. Fix classpath for contrib unit tests to include clover jar.
			
 
				+    (nigel)
			
 
				+
			
 
				+    HADOOP-5206. Synchronize "unprotected*" methods of FSDirectory on the root.
			
 
				+    (Jakob Homan via shv)
			
 
				+
			
 
				+    HADOOP-5292. Fix NPE in KFS::getBlockLocations. (Sriram Rao via lohit)
			
 
				+
			
 
				+    HADOOP-5219. Adds a new property io.seqfile.local.dir for use by
			
 
				+    SequenceFile, which earlier used mapred.local.dir. (Sharad Agarwal
			
 
				+    via ddas)
			
 
				+
			
 
				+    HADOOP-5300. Fix ant javadoc-dev target and the typo in the class name
			
 
				+    NameNodeActivtyMBean.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5218.  libhdfs unit test failed because it was unable to 
			
 
				+    start namenode/datanode. Fixed. (dhruba)
			
 
				+
			
 
				+    HADOOP-5273. Add license header to TestJobInProgress.java.  (Jakob Homan
			
 
				+    via szetszwo)
			
 
				+    
			
 
				+    HADOOP-5229. Remove duplicate version variables in build files
			
 
				+    (Stefan Groschupf via johan)
			
 
				+
			
 
				+    HADOOP-5383. Avoid building an unused string in NameNode's 
			
 
				+    verifyReplication(). (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-5347. Create a job output directory for the bbp examples. (szetszwo)
			
 
				+
			
 
				+    HADOOP-5341. Make hadoop-daemon scripts backwards compatible with the
			
 
				+    changes in HADOOP-4868. (Sharad Agarwal via yhemanth)
			
 
				+
			
 
				+    HADOOP-5456. Fix javadoc links to ClientProtocol#restoreFailedStorage(..).
			
 
				+    (Boris Shkolnik via szetszwo)
			
 
				+
			
 
				+    HADOOP-5458. Remove leftover Chukwa entries from build, etc. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5386. Modify hdfsproxy unit test to start on a random port,
			
 
				+    implement clover instrumentation. (Zhiyong Zhang via cdouglas)
			
 
				+
			
 
				+    HADOOP-5511. Add Apache License to EditLogBackupOutputStream. (shv)
			
 
				+
			
 
				+    HADOOP-5507. Fix JMXGet javadoc warnings.  (Boris Shkolnik via szetszwo)
			
 
				+
			
 
				+    HADOOP-5191. Accessing HDFS with any ip or hostname should work as long 
			
 
				+    as it points to the interface NameNode is listening on. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-5561. Add javadoc.maxmemory parameter to build, preventing OOM
			
 
				+    exceptions from javadoc-dev. (Jakob Homan via cdouglas)
			
 
				+
			
 
				+    HADOOP-5149. Modify HistoryViewer to ignore unfamiliar files in the log
			
 
				+    directory. (Hong Tang via cdouglas)
			
 
				+
			
 
				+    HADOOP-5477. Fix rare failure in TestCLI for hosts returning variations of
			
 
				+    'localhost'. (Jakob Homan via cdouglas)
			
 
				+
			
 
				+    HADOOP-5194. Disables setsid for tasks run on cygwin. 
			
 
				+    (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-5322. Fix misleading/outdated comments in JobInProgress.
			
 
				+    (Amareshwari Sriramadasu via cdouglas)
			
 
				+
			
 
				+    HADOOP-5198. Fixes a problem to do with the task PID file being absent and 
			
 
				+    the JvmManager trying to look for it. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-5464. DFSClient did not treat write timeout of 0 properly.
			
 
				+    (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4045. Fix processing of IO errors in EditsLog.
			
 
				+    (Boris Shkolnik via shv)
			
 
				+
			
 
				+    HADOOP-5462. Fixed a double free bug in the task-controller
			
 
				+    executable. (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5652. Fix a bug where in-memory segments are incorrectly retained in
			
 
				+    memory. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5533. Recovery duration shown on the jobtracker webpage is 
			
 
				+    inaccurate. (Amar Kamat via sharad)
			
 
				+
			
 
				+    HADOOP-5647. Fix TestJobHistory to not depend on /tmp. (Ravi Gummadi 
			
 
				+    via sharad)
			
 
				+
			
 
				+    HADOOP-5661. Fixes some findbugs warnings in o.a.h.mapred* packages and
			
 
				+    supresses a bunch of them. (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-5704. Fix compilation problems in TestFairScheduler and
			
 
				+    TestCapacityScheduler.  (Chris Douglas via szetszwo)
			
 
				+
			
 
				+    HADOOP-5650. Fix safemode messages in the Namenode log.  (Suresh Srinivas
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-5488. Removes the pidfile management for the Task JVM from the
			
 
				+    framework and instead passes the PID back and forth between the
			
 
				+    TaskTracker and the Task processes. (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-5658. Fix Eclipse templates. (Philip Zeyliger via shv)
			
 
				+
			
 
				+    HADOOP-5709. Remove redundant synchronization added in HADOOP-5661. (Jothi
			
 
				+    Padmanabhan via cdouglas)
			
 
				+
			
 
				+    HADOOP-5715. Add conf/mapred-queue-acls.xml to the ignore lists.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-5592. Fix typo in Streaming doc in reference to GzipCodec.
			
 
				+    (Corinne Chandel via tomwhite)
			
 
				+
			
 
				+    HADOOP-5656. Counter for S3N Read Bytes does not work. (Ian Nowland
			
 
				+    via tomwhite)
			
 
				+
			
 
				+    HADOOP-5406. Fix JNI binding for ZlibCompressor::setDictionary. (Lars
			
 
				+    Francke via cdouglas)
			
 
				+
			
 
				+    HADOOP-3426. Fix/provide handling when DNS lookup fails on the loopback
			
 
				+    address. Also cache the result of the lookup. (Steve Loughran via cdouglas)
			
 
				+
			
 
				+    HADOOP-5476. Close the underlying InputStream in SequenceFile::Reader when
			
 
				+    the constructor throws an exception. (Michael Tamm via cdouglas)
			
 
				+
			
 
				+    HADOOP-5675. Do not launch a job if DistCp has no work to do. (Tsz Wo
			
 
				+    (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-5737. Fixes a problem in the way the JobTracker used to talk to
			
 
				+    other daemons like the NameNode to get the job's files. Also adds APIs
			
 
				+    in the JobTracker to get the FileSystem objects as per the JobTracker's
			
 
				+    configuration. (Amar Kamat via ddas) 
			
 
				+
			
 
				+    HADOOP-5648. Not able to generate gridmix.jar on the already compiled 
			
 
				+    version of hadoop. (gkesavan)	
			
 
				+
			
 
				+    HADOOP-5808. Fix import never used javac warnings in hdfs. (szetszwo)
			
 
				+
			
 
				+    HADOOP-5203. TT's version build is too restrictive. (Rick Cox via sharad)
			
 
				+
			
 
				+    HADOOP-5818. Revert the renaming from FSNamesystem.checkSuperuserPrivilege
			
 
				+    to checkAccess by HADOOP-5643.  (Amar Kamat via szetszwo)
			
 
				+
			
 
				+    HADOOP-5820. Fix findbugs warnings for http related codes in hdfs.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-5822. Fix javac warnings in several dfs tests related to unncessary
			
 
				+    casts.  (Jakob Homan via szetszwo)
			
 
				+
			
 
				+    HADOOP-5842. Fix a few javac warnings under packages fs and util.
			
 
				+    (Hairong Kuang via szetszwo)
			
 
				+
			
 
				+    HADOOP-5845. Build successful despite test failure on test-core target.
			
 
				+    (sharad)
			
 
				+
			
 
				+    HADOOP-5314. Prevent unnecessary saving of the file system image during 
			
 
				+    name-node startup. (Jakob Homan via shv)
			
 
				+
			
 
				+    HADOOP-5855. Fix javac warnings for DisallowedDatanodeException and
			
 
				+    UnsupportedActionException.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5582. Fixes a problem in Hadoop Vaidya to do with reading
			
 
				+    counters from job history files. (Suhas Gogate via ddas)
			
 
				+
			
 
				+    HADOOP-5829. Fix javac warnings found in ReplicationTargetChooser,
			
 
				+    FSImage, Checkpointer, SecondaryNameNode and a few other hdfs classes.
			
 
				+    (Suresh Srinivas via szetszwo)
			
 
				+
			
 
				+    HADOOP-5835. Fix findbugs warnings found in Block, DataNode, NameNode and
			
 
				+    a few other hdfs classes.  (Suresh Srinivas via szetszwo)
			
 
				+
			
 
				+    HADOOP-5853. Undeprecate HttpServer.addInternalServlet method.  (Suresh
			
 
				+    Srinivas via szetszwo)
			
 
				+
			
 
				+    HADOOP-5801. Fixes the problem: If the hosts file is changed across restart
			
 
				+    then it should be refreshed upon recovery so that the excluded hosts are 
			
 
				+    lost and the maps are re-executed. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5841. Resolve findbugs warnings in DistributedFileSystem,
			
 
				+    DatanodeInfo, BlocksMap, DataNodeDescriptor.  (Jakob Homan via szetszwo)
			
 
				+
			
 
				+    HADOOP-5878. Fix import and Serializable javac warnings found in hdfs jsp.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-5782. Revert a few formatting changes introduced in HADOOP-5015.
			
 
				+    (Suresh Srinivas via rangadi)
			
 
				+
			
 
				+    HADOOP-5687. NameNode throws NPE if fs.default.name is the default value.
			
 
				+    (Philip Zeyliger via shv)
			
 
				+
			
 
				+    HADOOP-5867. Fix javac warnings found in NNBench and NNBenchWithoutMR.
			
 
				+    (Konstantin Boudnik via szetszwo)
			
 
				+    
			
 
				+    HADOOP-5728. Fixed FSEditLog.printStatistics IndexOutOfBoundsException.
			
 
				+    (Wang Xu via johan)
			
 
				+
			
 
				+    HADOOP-5847. Fixed failing Streaming unit tests (gkesavan) 
			
 
				+
			
 
				+    HADOOP-5252. Streaming overrides -inputformat option (Klaas Bosteels 
			
 
				+    via sharad)
			
 
				+
			
 
				+    HADOOP-5710. Counter MAP_INPUT_BYTES missing from new mapreduce api. 
			
 
				+    (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5809. Fix job submission, broken by errant directory creation.
			
 
				+    (Sreekanth Ramakrishnan and Jothi Padmanabhan via cdouglas)
			
 
				+
			
 
				+    HADOOP-5635. Change distributed cache to work with other distributed file
			
 
				+    systems. (Andrew Hitchcock via tomwhite)
			
 
				+
			
 
				+    HADOOP-5856. Fix "unsafe multithreaded use of DateFormat" findbugs warning
			
 
				+    in DataBlockScanner.  (Kan Zhang via szetszwo)
			
 
				+
			
 
				+    HADOOP-4864. Fixes a problem to do with -libjars with multiple jars when
			
 
				+    client and cluster reside on different OSs. (Amareshwari Sriramadasu via 
			
 
				+    ddas)
			
 
				+
			
 
				+    HADOOP-5623. Fixes a problem to do with status messages getting overwritten
			
 
				+    in streaming jobs. (Rick Cox and Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-5895. Fixes computation of count of merged bytes for logging.
			
 
				+    (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-5805. problem using top level s3 buckets as input/output 
			
 
				+    directories. (Ian Nowland via tomwhite)
			
 
				+   
			
 
				+    HADOOP-5940. trunk eclipse-plugin build fails while trying to copy 
			
 
				+    commons-cli jar from the lib dir (Giridharan Kesavan via gkesavan)
			
 
				+
			
 
				+    HADOOP-5864. Fix DMI and OBL findbugs in packages hdfs and metrics.
			
 
				+    (hairong)
			
 
				+
			
 
				+    HADOOP-5935. Fix Hudson's release audit warnings link is broken. 
			
 
				+    (Giridharan Kesavan via gkesavan)
			
 
				+
			
 
				+    HADOOP-5947. Delete empty TestCombineFileInputFormat.java
			
 
				+
			
 
				+    HADOOP-5899. Move a log message in FSEditLog to the right place for
			
 
				+    avoiding unnecessary log.  (Suresh Srinivas via szetszwo)
			
 
				+
			
 
				+    HADOOP-5944. Add Apache license header to BlockManager.java.  (Suresh
			
 
				+    Srinivas via szetszwo)
			
 
				+
			
 
				+    HADOOP-5891. SecondaryNamenode is able to converse with the NameNode 
			
 
				+    even when the default value of dfs.http.address is not overridden.
			
 
				+    (Todd Lipcon via dhruba)
			
 
				+
			
 
				+    HADOOP-5953. The isDirectory(..) and isFile(..) methods in KosmosFileSystem
			
 
				+    should not be deprecated.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5954. Fix javac warnings in TestFileCreation, TestSmallBlock,
			
 
				+    TestFileStatus, TestDFSShellGenericOptions, TestSeekBug and
			
 
				+    TestDFSStartupVersions.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5956. Fix ivy dependency in hdfsproxy and capacity-scheduler.
			
 
				+    (Giridharan Kesavan via szetszwo)
			
 
				+
			
 
				+    HADOOP-5836. Bug in S3N handling of directory markers using an object with
			
 
				+    a trailing "/" causes jobs to fail. (Ian Nowland via tomwhite)
			
 
				+
			
 
				+    HADOOP-5861. s3n files are not getting split by default. (tomwhite)
			
 
				+
			
 
				+    HADOOP-5762. Fix a problem that DistCp does not copy empty directory.
			
 
				+    (Rodrigo Schmidt via szetszwo)
			
 
				+
			
 
				+    HADOOP-5859. Fix "wait() or sleep() with locks held" findbugs warnings in
			
 
				+    DFSClient.  (Kan Zhang via szetszwo)
			
 
				+   
			
 
				+    HADOOP-5457. Fix to continue to run builds even if contrib test fails
			
 
				+    (Giridharan Kesavan via gkesavan)
			
 
				+
			
 
				+    HADOOP-5963. Remove an unnecessary exception catch in NNBench.  (Boris
			
 
				+    Shkolnik via szetszwo)
			
 
				+
			
 
				+    HADOOP-5989. Fix streaming test failure.  (gkesavan)
			
 
				+
			
 
				+    HADOOP-5981. Fix a bug in HADOOP-2838 in parsing mapred.child.env.
			
 
				+    (Amar Kamat via sharad)
			
 
				+
			
 
				+    HADOOP-5420. Fix LinuxTaskController to kill tasks using the process
			
 
				+    groups they are launched with.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-6031. Remove @author tags from Java source files.  (Ravi Phulari
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-5980. Fix LinuxTaskController so tasks get passed 
			
 
				+    LD_LIBRARY_PATH and other environment variables.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4041. IsolationRunner does not work as documented.
			
 
				+    (Philip Zeyliger via tomwhite)
			
 
				+
			
 
				+    HADOOP-6004. Fixes BlockLocation deserialization.  (Jakob Homan via
			
 
				+    szetszwo)
			
 
				+
			
 
				+    HADOOP-6079. Serialize proxySource as DatanodeInfo in DataTransferProtocol.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-6096. Fix Eclipse project and classpath files following project
			
 
				+    split. (tomwhite)
			
 
				+
			
 
				+    HADOOP-6122. The great than operator in test-patch.sh should be "-gt" but
			
 
				+    not ">".  (szetszwo)
			
 
				+
			
 
				+    HADOOP-6114. Fix javadoc documentation for FileStatus.getLen.
			
 
				+    (Dmitry Rzhevskiy via dhruba)
			
 
				+
			
 
				+    HADOOP-6131. A sysproperty should not be set unless the property 
			
 
				+    is set on the ant command line in build.xml (hong tang via mahadev)
			
 
				+
			
 
				+    HADOOP-6137. Fix project specific test-patch requirements
			
 
				+    (Giridharan Kesavan)
			
 
				+
			
 
				+    HADOOP-6138. Eliminate the deprecated warnings introduced by H-5438.
			
 
				+    (He Yongqiang via szetszwo)
			
 
				+
			
 
				+    HADOOP-6132. RPC client create an extra connection because of incorrect
			
 
				+    key for connection cache. (Kan Zhang via rangadi)
			
 
				+
			
 
				+    HADOOP-6123. Add missing classpaths in hadoop-config.sh.  (Sharad Agarwal
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-6172. Fix jar file names in hadoop-config.sh and include 
			
 
				+    ${build.src} as a part of the source list in build.xml.  (Hong Tang via 
			
 
				+    szetszwo)
			
 
				+
			
 
				+    HADOOP-6124. Fix javac warning detection in test-patch.sh.  (Giridharan
			
 
				+    Kesavan via szetszwo)
			
 
				+
			
 
				+    HADOOP-6177. FSInputChecker.getPos() would return position greater 
			
 
				+    than the file size. (Hong Tang via hairong)
			
 
				+
			
 
				+    HADOOP-6188. TestTrash uses java.io.File api but not hadoop FileSystem api.
			
 
				+    (Boris Shkolnik via szetszwo)
			
 
				+
			
 
				+    HADOOP-6192. Fix Shell.getUlimitMemoryCommand to not rely on Map-Reduce
			
 
				+    specific configs. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-6103. Clones the classloader as part of Configuration clone.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-6152. Fix classpath variables in bin/hadoop-config.sh and some
			
 
				+    other scripts.  (Aaron Kimball via szetszwo)
			
 
				+
			
 
				+    HADOOP-6215. fix GenericOptionParser to deal with -D with '=' in the 
			
 
				+    value. (Amar Kamat via sharad)
			
 
				+
			
 
				+    HADOOP-6227. Fix Configuration to allow final parameters to be set to null
			
 
				+    and prevent them from being overridden.
			
 
				+    (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    HADOOP-6199. Move io.map.skip.index property to core-default from mapred.
			
 
				+    (Amareshwari Sriramadasu via cdouglas)
			
 
				+
			
 
				+    HADOOP-6229. Attempt to make a directory under an existing file on
			
 
				+    LocalFileSystem should throw an Exception. (Boris Shkolnik via tomwhite)
			
 
				+
			
 
				+    HADOOP-6243. Fix a NullPointerException in processing deprecated keys.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-6009. S3N listStatus incorrectly returns null instead of empty
			
 
				+    array when called on empty root. (Ian Nowland via tomwhite)
			
 
				+
			
 
				+    HADOOP-6181.  Fix .eclipse.templates/.classpath for avro and jets3t jar
			
 
				+    files.  (Carlos Valiente via szetszwo)
			
 
				+
			
 
				+    HADOOP-6196. Fix a bug in SequenceFile.Reader where syncing within the
			
 
				+    header would cause the reader to read the sync marker as a record. (Jay
			
 
				+    Booth via cdouglas)
			
 
				+
			
 
				+    HADOOP-6250. Modify test-patch to delete copied XML files before running
			
 
				+    patch build. (Rahul Kumar Singh via yhemanth)
			
 
				+
			
 
				+    HADOOP-6257. Two TestFileSystem classes are confusing
			
 
				+    hadoop-hdfs-hdfwithmr. (Philip Zeyliger via tomwhite)
			
 
				+
			
 
				+    HADOOP-6151. Added a input filter to all of the http servlets that quotes
			
 
				+    html characters in the parameters, to prevent cross site scripting 
			
 
				+    attacks. (omalley)
			
 
				+
			
 
				+    HADOOP-6274. Fix TestLocalFSFileContextMainOperations test failure.
			
 
				+    (Gary Murry via suresh).
			
 
				+
			
 
				+    HADOOP-6281. Avoid null pointer exceptions when the jsps don't have 
			
 
				+    paramaters (omalley)
			
 
				+
			
 
				+    HADOOP-6285. Fix the result type of the getParameterMap method in the
			
 
				+    HttpServer.QuotingInputFilter. (omalley)
			
 
				+
			
 
				+    HADOOP-6286. Fix bugs in related to URI handling in glob methods in 
			
 
				+    FileContext. (Boris Shkolnik via suresh)
			
 
				+
			
 
				+    HADOOP-6292. Update native libraries guide. (Corinne Chandel via cdouglas)
			
 
				+
			
 
				+    HADOOP-6327. FileContext tests should not use /tmp and should clean up
			
 
				+    files.  (Sanjay Radia via szetszwo)
			
 
				+
			
 
				+    HADOOP-6318. Upgrade to Avro 1.2.0.  (cutting)
			
 
				+
			
 
				+    HADOOP-6334.  Fix GenericOptionsParser to understand URI for -files,
			
 
				+    -libjars and -archives options and fix Path to support URI with fragment.
			
 
				+    (Amareshwari Sriramadasu via szetszwo)
			
 
				+
			
 
				+    HADOOP-6344. Fix rm and rmr immediately delete files rather than sending 
			
 
				+    to trash, if a user is over-quota. (Jakob Homan via suresh)
			
 
				+
			
 
				+    HADOOP-6347. run-test-core-fault-inject runs a test case twice if
			
 
				+    -Dtestcase is set (cos)
			
 
				+
			
 
				+    HADOOP-6375. Sync documentation for FsShell du with its implementation.
			
 
				+    (Todd Lipcon via cdouglas)
			
 
				+
			
 
				+    HADOOP-6441. Protect web ui from cross site scripting attacks (XSS) on
			
 
				+    the host http header and using encoded utf-7. (omalley)
			
 
				+
			
 
				+    HADOOP-6451. Fix build to run contrib unit tests. (Tom White via cdouglas)
			
 
				+
			
 
				+    HADOOP-6374. JUnit tests should never depend on anything in conf.
			
 
				+    (Anatoli Fomenko via cos)
			
 
				+
			
 
				+    HADOOP-6290. Prevent duplicate slf4j-simple jar via Avro's classpath.
			
 
				+    (Owen O'Malley via cdouglas)
			
 
				+
			
 
				+    HADOOP-6293. Fix FsShell -text to work on filesystems other than the
			
 
				+    default. (cdouglas)
			
 
				+
			
 
				+    HADOOP-6341. Fix test-patch.sh for checkTests function. (gkesavan)
			
 
				+
			
 
				+    HADOOP-6314. Fix "fs -help" for the "-count" commond.  (Ravi Phulari via
			
 
				+    szetszwo)
			
 
				+
			
 
				+    HADOOP-6405. Update Eclipse configuration to match changes to Ivy
			
 
				+    configuration (Edwin Chan via cos)
			
 
				+
			
 
				+    HADOOP-6411. Remove deprecated file src/test/hadoop-site.xml. (cos)
			
 
				+
			
 
				+    HADOOP-6386. NameNode's HttpServer can't instantiate InetSocketAddress:
			
 
				+    IllegalArgumentException is thrown (cos)
			
 
				+
			
 
				+    HADOOP-6254. Slow reads cause s3n to fail with SocketTimeoutException.
			
 
				+    (Andrew Hitchcock via tomwhite)
			
 
				+
			
 
				+    HADOOP-6428. HttpServer sleeps with negative values. (cos)
			
 
				+
			
 
				+    HADOOP-6414. Add command line help for -expunge command.
			
 
				+    (Ravi Phulari via tomwhite)
			
 
				+
			
 
				+    HADOOP-6391. Classpath should not be part of command line arguments.
			
 
				+    (Cristian Ivascu via tomwhite)
			
 
				+
			
 
				+    HADOOP-6462. Target "compile" does not exist in contrib/cloud. (tomwhite)
			
 
				+
			
 
				+    HADOOP-6402. testConf.xsl is not well-formed XML. (Steve Loughran
			
 
				+    via tomwhite)
			
 
				+
			
 
				+    HADOOP-6489. Fix 3 findbugs warnings. (Erik Steffl via suresh)
			
 
				+
			
 
				+    HADOOP-6517. Fix UserGroupInformation so that tokens are saved/retrieved
			
 
				+    to/from the embedded Subject (Owen O'Malley & Kan Zhang via ddas)
			
 
				+
			
 
				+    HADOOP-6538. Sets hadoop.security.authentication to simple by default.
			
 
				+    (ddas)
			
 
				+
			
 
				+    HADOOP-6540. Contrib unit tests have invalid XML for core-site, etc.
			
 
				+    (Aaron Kimball via tomwhite)
			
 
				+
			
 
				+    HADOOP-6521. User specified umask using deprecated dfs.umask must override
			
 
				+    server configured using new dfs.umaskmode for backward compatibility.
			
 
				+    (suresh)
			
 
				+    
			
 
				+    HADOOP-6522. Fix decoding of codepoint zero in UTF8. (cutting)
			
 
				+
			
 
				+    HADOOP-6505. Use tr rather than sed to effect literal substitution in the
			
 
				+    build script. (Allen Wittenauer via cdouglas)
			
 
				+
			
 
				+    HADOOP-6548. Replace mortbay imports with commons logging. (cdouglas)
			
 
				+
			
 
				+    HADOOP-6560. Handle invalid har:// uri in HarFileSystem.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-6549. TestDoAsEffectiveUser should use ip address of the host
			
 
				+     for superuser ip check(jnp via boryas)
			
 
				+
			
 
				+    HADOOP-6570. RPC#stopProxy throws NPE if getProxyEngine(proxy) returns
			
 
				+    null. (hairong)
			
 
				+
			
 
				+    HADOOP-6558. Return null in HarFileSystem.getFileChecksum(..) since no
			
 
				+    checksum algorithm is implemented.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-6572. Makes sure that SASL encryption and push to responder
			
 
				+    queue for the RPC response happens atomically. (Kan Zhang via ddas)
			
 
				+
			
 
				+    HADOOP-6545. Changes the Key for the FileSystem cache to be UGI (ddas)
			
 
				+
			
 
				+    HADOOP-6609. Fixed deadlock in RPC by replacing shared static 
			
 
				+    DataOutputBuffer in the UTF8 class with a thread local variable. (omalley)
			
 
				+
			
 
				+    HADOOP-6504. Invalid example in the documentation of
			
 
				+    org.apache.hadoop.util.Tool. (Benoit Sigoure via tomwhite)
			
 
				+
			
 
				+    HADOOP-6546. BloomMapFile can return false negatives. (Clark Jefcoat
			
 
				+    via tomwhite)
			
 
				+
			
 
				+    HADOOP-6593. TextRecordInputStream doesn't close SequenceFile.Reader.
			
 
				+    (Chase Bradford via tomwhite)
			
 
				+
			
 
				+    HADOOP-6175. Incorrect version compilation with es_ES.ISO8859-15 locale
			
 
				+    on Solaris 10. (Urko Benito via tomwhite)
			
 
				+
			
 
				+    HADOOP-6645.  Bugs on listStatus for HarFileSystem (rodrigo via mahadev)
			
 
				+
			
 
				+    HADOOP-6645. Re: Bugs on listStatus for HarFileSystem (rodrigo via
			
 
				+    mahadev)
			
 
				+
			
 
				+    HADOOP-6654. Fix code example in WritableComparable javadoc.  (Tom White
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-6640. FileSystem.get() does RPC retries within a static
			
 
				+    synchronized block. (hairong)
			
 
				+
			
 
				+    HADOOP-6691. TestFileSystemCaching sometimes hangs. (hairong)
			
 
				+
			
 
				+    HADOOP-6507. Hadoop Common Docs - delete 3 doc files that do not belong
			
 
				+    under Common. (Corinne Chandel via tomwhite)
			
 
				+
			
 
				+    HADOOP-6439. Fixes handling of deprecated keys to follow order in which
			
 
				+    keys are defined. (V.V.Chaitanya Krishna via yhemanth)
			
 
				+
			
 
				+    HADOOP-6690. FilterFileSystem correctly handles setTimes call.
			
 
				+    (Rodrigo Schmidt via dhruba)
			
 
				+
			
 
				+    HADOOP-6703. Prevent renaming a file, directory or symbolic link to
			
 
				+    itself. (Eli Collins via suresh)
			
 
				+
			
 
				+    HADOOP-6710. Symbolic umask for file creation is not conformant with posix.
			
 
				+    (suresh)
			
 
				+    
			
 
				+    HADOOP-6719. Insert all missing methods in FilterFs.
			
 
				+    (Rodrigo Schmidt via dhruba)
			
 
				+
			
 
				+    HADOOP-6724. IPC doesn't properly handle IOEs thrown by socket factory.
			
 
				+    (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6722. NetUtils.connect should check that it hasn't connected a socket
			
 
				+    to itself. (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6634. Fix AccessControlList to use short names to verify access 
			
 
				+    control. (Vinod Kumar Vavilapalli via sharad)
			
 
				+
			
 
				+    HADOOP-6709. Re-instate deprecated FileSystem methods that were removed
			
 
				+    after 0.20. (tomwhite)
			
 
				+ 
			
 
				+    HADOOP-6630. hadoop-config.sh fails to get executed if hadoop wrapper
			
 
				+    scripts are in path. (Allen Wittenauer via tomwhite)
			
 
				+
			
 
				+    HADOOP-6742. Add methods HADOOP-6709 from to TestFilterFileSystem.
			
 
				+    (Eli Collins via tomwhite)
			
 
				+
			
 
				+    HADOOP-6727. Remove UnresolvedLinkException from public FileContext APIs.
			
 
				+    (Eli Collins via tomwhite)
			
 
				+
			
 
				+    HADOOP-6631. Fix FileUtil.fullyDelete() to continue deleting other files 
			
 
				+    despite failure at any level. (Contributed by Ravi Gummadi and 
			
 
				+    Vinod Kumar Vavilapalli)
			
 
				+
			
 
				+    HADOOP-6723.  Unchecked exceptions thrown in IPC Connection should not
			
 
				+    orphan clients.  (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-6404. Rename the generated artifacts to common instead of core.
			
 
				+    (tomwhite)
			
 
				+
			
 
				+    HADOOP-6461.  Webapps aren't located correctly post-split.
			
 
				+    (Todd Lipcon and Steve Loughran via tomwhite)
			
 
				+
			
 
				+    HADOOP-6826.  Revert FileSystem create method that takes CreateFlags.
			
 
				+    (tomwhite)
			
 
				+
			
 
				+    HADOOP-6800.  Harmonize JAR library versions.  (tomwhite)
			
 
				+
			
 
				+    HADOOP-6847. Problem staging 0.21.0 artifacts to Apache Nexus Maven
			
 
				+    Repository (Giridharan Kesavan via cos)
			
 
				+
			
 
				+    HADOOP-6819. [Herriot] Shell command for getting the new exceptions in
			
 
				+    the logs returning exitcode 1 after executing successfully. (Vinay Thota
			
 
				+    via cos)
			
 
				+
			
 
				+    HADOOP-6839. [Herriot] Implement a functionality for getting the user list
			
 
				+    for creating proxy users. (Vinay Thota via cos)
			
 
				+
			
 
				+    HADOOP-6836. [Herriot]: Generic method for adding/modifying the attributes
			
 
				+    for new configuration. (Vinay Thota via cos)
			
 
				+
			
 
				+    HADOOP-6860. 'compile-fault-inject' should never be called directly.
			
 
				+    (Konstantin Boudnik)
			
 
				+
			
 
				+    HADOOP-6790. Instrumented (Herriot) build uses too wide mask to include
			
 
				+    aspect files. (Konstantin Boudnik)
			
 
				+
			
 
				+    HADOOP-6875. [Herriot] Cleanup of temp. configurations is needed upon
			
 
				+    restart of a cluster (Vinay Thota via cos)
			
 
				+
			
 
				+Release 0.20.3 - Unreleased
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-6637. Benchmark for establishing RPC session. (shv)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-6760. WebServer shouldn't increase port number in case of negative
			
 
				+    port setting caused by Jetty's race (cos)
			
 
				+    
			
 
				+    HADOOP-6881. Make WritableComparator intialize classes when
			
 
				+    looking for their raw comparator, as classes often register raw
			
 
				+    comparators in initializers, which are no longer automatically run
			
 
				+    in Java 6 when a class is referenced. (cutting via omalley)
			
 
				+
			
 
				+Release 0.20.2 - 2010-2-16
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-6218. Adds a feature where TFile can be split by Record
			
 
				+    Sequence number. (Hong Tang and Raghu Angadi via ddas)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-6231. Allow caching of filesystem instances to be disabled on a
			
 
				+    per-instance basis. (tomwhite)
			
 
				+
			
 
				+    HADOOP-5759. Fix for IllegalArgumentException when CombineFileInputFormat
			
 
				+    is used as job InputFormat. (Amareshwari Sriramadasu via dhruba)
			
 
				+
			
 
				+    HADOOP-6097. Fix Path conversion in makeQualified and reset LineReader byte
			
 
				+    count at the start of each block in Hadoop archives. (Ben Slusky, Tom
			
 
				+    White, and Mahadev Konar via cdouglas)
			
 
				+
			
 
				+    HADOOP-6269. Fix threading issue with defaultResource in Configuration.
			
 
				+    (Sreekanth Ramakrishnan via cdouglas)
			
 
				+
			
 
				+    HADOOP-6460. Reinitializes buffers used for serializing responses in ipc
			
 
				+    server on exceeding maximum response size to free up Java heap. (suresh)
			
 
				+
			
 
				+    HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
			
 
				+    GzipCodec. (Aaron Kimball via cdouglas)
			
 
				+
			
 
				+    HADOOP-6498. IPC client bug may cause rpc call hang. (Ruyue Ma and
			
 
				+    hairong via hairong)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-5611. Fix C++ libraries to build on Debian Lenny. (Todd Lipcon
			
 
				+    via tomwhite)
			
 
				+
			
 
				+    HADOOP-5612. Some c++ scripts are not chmodded before ant execution.
			
 
				+    (Todd Lipcon via tomwhite)
			
 
				+
			
 
				+    HADOOP-1849. Add undocumented configuration parameter for per handler 
			
 
				+    call queue size in IPC Server. (shv)
			
 
				+
			
 
				+Release 0.20.1 - 2009-09-01
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-5726. Remove pre-emption from capacity scheduler code base.
			
 
				+    (Rahul Kumar Singh via yhemanth)
			
 
				+
			
 
				+    HADOOP-5881. Simplify memory monitoring and scheduling related
			
 
				+    configuration. (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-6080. Introduce -skipTrash option to rm and rmr.
			
 
				+    (Jakob Homan via shv)
			
 
				+
			
 
				+    HADOOP-3315. Add a new, binary file foramt, TFile. (Hong Tang via cdouglas)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-5711. Change Namenode file close log to info. (szetszwo)
			
 
				+
			
 
				+    HADOOP-5736. Update the capacity scheduler documentation for features
			
 
				+    like memory based scheduling, job initialization and removal of pre-emption.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5714. Add a metric for NameNode getFileInfo operation. (Jakob Homan
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-4372. Improves the way history filenames are obtained and manipulated.
			
 
				+    (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5897. Add name-node metrics to capture java heap usage.
			
 
				+    (Suresh Srinivas via shv)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-5691. Makes org.apache.hadoop.mapreduce.Reducer concrete class
			
 
				+    instead of abstract. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5646. Fixes a problem in TestQueueCapacities.
			
 
				+    (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-5655. TestMRServerPorts fails on java.net.BindException. (Devaraj
			
 
				+    Das via hairong)
			
 
				+
			
 
				+    HADOOP-5654. TestReplicationPolicy.<init> fails on java.net.BindException.
			
 
				+    (hairong)
			
 
				+
			
 
				+    HADOOP-5688. Fix HftpFileSystem checksum path construction. (Tsz Wo
			
 
				+    (Nicholas) Sze via cdouglas)
			
 
				+
			
 
				+    HADOOP-4674. Fix fs help messages for -test, -text, -tail, -stat 
			
 
				+    and -touchz options.  (Ravi Phulari via szetszwo)
			
 
				+
			
 
				+    HADOOP-5718. Remove the check for the default queue in capacity scheduler.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5719. Remove jobs that failed initialization from the waiting queue
			
 
				+    in the capacity scheduler. (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4744. Attaching another fix to the jetty port issue. The TaskTracker
			
 
				+    kills itself if it ever discovers that the port to which jetty is actually
			
 
				+    bound is invalid (-1). (ddas)
			
 
				+
			
 
				+    HADOOP-5349. Fixes a problem in LocalDirAllocator to check for the return
			
 
				+    path value that is returned for the case where the file we want to write
			
 
				+    is of an unknown size. (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-5636. Prevents a job from going to RUNNING state after it has been
			
 
				+    KILLED (this used to happen when the SetupTask would come back with a 
			
 
				+    success after the job has been killed). (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5641. Fix a NullPointerException in capacity scheduler's memory
			
 
				+    based scheduling code when jobs get retired. (yhemanth)
			
 
				+
			
 
				+    HADOOP-5828. Use absolute path for mapred.local.dir of JobTracker in
			
 
				+    MiniMRCluster. (yhemanth)
			
 
				+
			
 
				+    HADOOP-4981. Fix capacity scheduler to schedule speculative tasks 
			
 
				+    correctly in the presence of High RAM jobs.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5210. Solves a problem in the progress report of the reduce task.
			
 
				+    (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-5850. Fixes a problem to do with not being able to jobs with
			
 
				+    0 maps/reduces. (Vinod K V via ddas)
			
 
				+
			
 
				+    HADOOP-4626. Correct the API links in hdfs forrest doc so that they
			
 
				+    point to the same version of hadoop.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5883. Fixed tasktracker memory monitoring to account for
			
 
				+    momentary spurts in memory usage due to java's fork() model.
			
 
				+    (yhemanth)
			
 
				+
			
 
				+    HADOOP-5539. Fixes a problem to do with not preserving intermediate
			
 
				+    output compression for merged data.
			
 
				+    (Jothi Padmanabhan and Billy Pearson via ddas)
			
 
				+
			
 
				+    HADOOP-5932. Fixes a problem in capacity scheduler in computing
			
 
				+    available memory on a tasktracker.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-5908. Fixes a problem to do with ArithmeticException in the 
			
 
				+    JobTracker when there are jobs with 0 maps. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5924. Fixes a corner case problem to do with job recovery with
			
 
				+    empty history files. Also, after a JT restart, sends KillTaskAction to 
			
 
				+    tasks that report back but the corresponding job hasn't been initialized
			
 
				+    yet. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5882. Fixes a reducer progress update problem for new mapreduce
			
 
				+    api. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-5746. Fixes a corner case problem in Streaming, where if an exception
			
 
				+    happens in MROutputThread after the last call to the map/reduce method, the 
			
 
				+    exception goes undetected. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5884. Fixes accounting in capacity scheduler so that high RAM jobs
			
 
				+    take more slots. (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-5937. Correct a safemode message in FSNamesystem.  (Ravi Phulari
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-5869. Fix bug in assignment of setup / cleanup task that was
			
 
				+    causing TestQueueCapacities to fail.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5921. Fixes a problem in the JobTracker where it sometimes never used
			
 
				+    to come up due to a system file creation on JobTracker's system-dir failing. 
			
 
				+    This problem would sometimes show up only when the FS for the system-dir 
			
 
				+    (usually HDFS) is started at nearly the same time as the JobTracker. 
			
 
				+    (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5920. Fixes a testcase failure for TestJobHistory. 
			
 
				+    (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-6139. Fix the FsShell help messages for rm and rmr.  (Jakob Homan
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-6145. Fix FsShell rm/rmr error messages when there is a FNFE.
			
 
				+    (Jakob Homan via szetszwo)
			
 
				+
			
 
				+    HADOOP-6150. Users should be able to instantiate comparator using TFile
			
 
				+    API. (Hong Tang via rangadi)
			
 
				+
			
 
				+Release 0.20.0 - 2009-04-15
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-4210. Fix findbugs warnings for equals implementations of mapred ID
			
 
				+    classes. Removed public, static ID::read and ID::forName; made ID an
			
 
				+    abstract class. (Suresh Srinivas via cdouglas)
			
 
				+
			
 
				+    HADOOP-4253. Fix various warnings generated by findbugs. 
			
 
				+    Following deprecated methods in RawLocalFileSystem are removed:
			
 
				+  	  public String getName()
			
 
				+  	  public void lock(Path p, boolean shared)
			
 
				+  	  public void release(Path p) 
			
 
				+    (Suresh Srinivas via johan)
			
 
				+
			
 
				+    HADOOP-4618. Move http server from FSNamesystem into NameNode.
			
 
				+    FSNamesystem.getNameNodeInfoPort() is removed.
			
 
				+    FSNamesystem.getDFSNameNodeMachine() and FSNamesystem.getDFSNameNodePort()
			
 
				+      replaced by FSNamesystem.getDFSNameNodeAddress().
			
 
				+    NameNode(bindAddress, conf) is removed.
			
 
				+    (shv)
			
 
				+
			
 
				+    HADOOP-4567. GetFileBlockLocations returns the NetworkTopology
			
 
				+    information of the machines where the blocks reside. (dhruba)
			
 
				+
			
 
				+    HADOOP-4435. The JobTracker WebUI displays the amount of heap memory 
			
 
				+    in use. (dhruba)
			
 
				+
			
 
				+    HADOOP-4628. Move Hive into a standalone subproject. (omalley)
			
 
				+
			
 
				+    HADOOP-4188. Removes task's dependency on concrete filesystems.
			
 
				+    (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-1650. Upgrade to Jetty 6. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3986. Remove static Configuration from JobClient. (Amareshwari
			
 
				+    Sriramadasu via cdouglas)
			
 
				+      JobClient::setCommandLineConfig is removed
			
 
				+      JobClient::getCommandLineConfig is removed
			
 
				+      JobShell, TestJobShell classes are removed
			
 
				+
			
 
				+    HADOOP-4422. S3 file systems should not create bucket.
			
 
				+    (David Phillips via tomwhite)
			
 
				+
			
 
				+    HADOOP-4035. Support memory based scheduling in capacity scheduler.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-3497. Fix bug in overly restrictive file globbing with a
			
 
				+    PathFilter. (tomwhite)
			
 
				+
			
 
				+    HADOOP-4445. Replace running task counts with running task
			
 
				+    percentage in capacity scheduler UI. (Sreekanth Ramakrishnan via
			
 
				+    yhemanth)
			
 
				+
			
 
				+    HADOOP-4631. Splits the configuration into three parts - one for core,
			
 
				+    one for mapred and the last one for HDFS. (Sharad Agarwal via cdouglas)
			
 
				+
			
 
				+    HADOOP-3344. Fix libhdfs build to use autoconf and build the same
			
 
				+    architecture (32 vs 64 bit) of the JVM running Ant.  The libraries for
			
 
				+    pipes, utils, and libhdfs are now all in c++/<os_osarch_jvmdatamodel>/lib. 
			
 
				+    (Giridharan Kesavan via nigel)
			
 
				+
			
 
				+    HADOOP-4874. Remove LZO codec because of licensing issues. (omalley)
			
 
				+
			
 
				+    HADOOP-4970. The full path name of a file is preserved inside Trash.
			
 
				+    (Prasad Chakka via dhruba)
			
 
				+
			
 
				+    HADOOP-4103. NameNode keeps a count of missing blocks. It warns on 
			
 
				+    WebUI if there are such blocks. '-report' and '-metaSave' have extra
			
 
				+    info to track such blocks. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4783. Change permissions on history files on the jobtracker
			
 
				+    to be only group readable instead of world readable.
			
 
				+    (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-4575. Add a proxy service for relaying HsftpFileSystem requests.
			
 
				+    Includes client authentication via user certificates and config-based
			
 
				+    access control. (Kan Zhang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4661. Add DistCh, a new tool for distributed ch{mod,own,grp}.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4709. Add several new features and bug fixes to Chukwa.
			
 
				+      Added Hadoop Infrastructure Care Center (UI for visualize data collected
			
 
				+                                               by Chukwa)
			
 
				+      Added FileAdaptor for streaming small file in one chunk
			
 
				+      Added compression to archive and demux output
			
 
				+      Added unit tests and validation for agent, collector, and demux map 
			
 
				+        reduce job
			
 
				+      Added database loader for loading demux output (sequence file) to jdbc 
			
 
				+        connected database
			
 
				+      Added algorithm to distribute collector load more evenly
			
 
				+    (Jerome Boulon, Eric Yang, Andy Konwinski, Ariel Rabkin via cdouglas)
			
 
				+
			
 
				+    HADOOP-4179. Add Vaidya tool to analyze map/reduce job logs for performanc
			
 
				+    problems. (Suhas Gogate via omalley)
			
 
				+
			
 
				+    HADOOP-4029. Add NameNode storage information to the dfshealth page and
			
 
				+    move DataNode information to a separated page. (Boris Shkolnik via
			
 
				+    szetszwo)
			
 
				+
			
 
				+    HADOOP-4348. Add service-level authorization for Hadoop. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-4826. Introduce admin command saveNamespace. (shv)
			
 
				+
			
 
				+    HADOOP-3063  BloomMapFile - fail-fast version of MapFile for sparsely
			
 
				+    populated key space (Andrzej Bialecki via stack)
			
 
				+
			
 
				+    HADOOP-1230. Add new map/reduce API and deprecate the old one. Generally,
			
 
				+    the old code should work without problem. The new api is in 
			
 
				+    org.apache.hadoop.mapreduce and the old classes in org.apache.hadoop.mapred
			
 
				+    are deprecated. Differences in the new API:
			
 
				+      1. All of the methods take Context objects that allow us to add new
			
 
				+         methods without breaking compatability.
			
 
				+      2. Mapper and Reducer now have a "run" method that is called once and
			
 
				+         contains the control loop for the task, which lets applications
			
 
				+         replace it.
			
 
				+      3. Mapper and Reducer by default are Identity Mapper and Reducer.
			
 
				+      4. The FileOutputFormats use part-r-00000 for the output of reduce 0 and
			
 
				+         part-m-00000 for the output of map 0.
			
 
				+      5. The reduce grouping comparator now uses the raw compare instead of 
			
 
				+         object compare.
			
 
				+      6. The number of maps in FileInputFormat is controlled by min and max
			
 
				+         split size rather than min size and the desired number of maps.
			
 
				+      (omalley)
			
 
				+    
			
 
				+    HADOOP-3305.  Use Ivy to manage dependencies.  (Giridharan Kesavan
			
 
				+    and Steve Loughran via cutting)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-4749. Added a new counter REDUCE_INPUT_BYTES. (Yongqiang He via 
			
 
				+    zshao)
			
 
				+
			
 
				+    HADOOP-4234. Fix KFS "glue" layer to allow applications to interface
			
 
				+    with multiple KFS metaservers. (Sriram Rao via lohit)
			
 
				+
			
 
				+    HADOOP-4245. Update to latest version of KFS "glue" library jar. 
			
 
				+    (Sriram Rao via lohit)
			
 
				+
			
 
				+    HADOOP-4244. Change test-patch.sh to check Eclipse classpath no matter
			
 
				+    it is run by Hudson or not. (szetszwo)
			
 
				+
			
 
				+    HADOOP-3180. Add name of missing class to WritableName.getClass 
			
 
				+    IOException. (Pete Wyckoff via omalley)
			
 
				+
			
 
				+    HADOOP-4178. Make the capacity scheduler's default values configurable.
			
 
				+    (Sreekanth Ramakrishnan via omalley)
			
 
				+
			
 
				+    HADOOP-4262. Generate better error message when client exception has null
			
 
				+    message. (stevel via omalley)
			
 
				+
			
 
				+    HADOOP-4226. Refactor and document LineReader to make it more readily
			
 
				+    understandable. (Yuri Pradkin via cdouglas)
			
 
				+    
			
 
				+    HADOOP-4238. When listing jobs, if scheduling information isn't available 
			
 
				+    print NA instead of empty output. (Sreekanth Ramakrishnan via johan)
			
 
				+
			
 
				+    HADOOP-4284. Support filters that apply to all requests, or global filters,
			
 
				+    to HttpServer. (Kan Zhang via cdouglas)
			
 
				+    
			
 
				+    HADOOP-4276. Improve the hashing functions and deserialization of the 
			
 
				+    mapred ID classes. (omalley)
			
 
				+
			
 
				+    HADOOP-4485. Add a compile-native ant task, as a shorthand. (enis)
			
 
				+
			
 
				+    HADOOP-4454. Allow # comments in slaves file. (Rama Ramasamy via omalley)
			
 
				+
			
 
				+    HADOOP-3461. Remove hdfs.StringBytesWritable. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4437. Use Halton sequence instead of java.util.Random in 
			
 
				+    PiEstimator. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4572. Change INode and its sub-classes to package private. 
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4187. Does a runtime lookup for JobConf/JobConfigurable, and if 
			
 
				+    found, invokes the appropriate configure method. (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-4453. Improve ssl configuration and handling in HsftpFileSystem,
			
 
				+    particularly when used with DistCp. (Kan Zhang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4583. Several code optimizations in HDFS.  (Suresh Srinivas via
			
 
				+    szetszwo)
			
 
				+
			
 
				+    HADOOP-3923. Remove org.apache.hadoop.mapred.StatusHttpServer.  (szetszwo)
			
 
				+    
			
 
				+    HADOOP-4622. Explicitly specify interpretor for non-native
			
 
				+    pipes binaries. (Fredrik Hedberg via johan)
			
 
				+    
			
 
				+    HADOOP-4505. Add a unit test to test faulty setup task and cleanup
			
 
				+    task killing the job. (Amareshwari Sriramadasu via johan)
			
 
				+
			
 
				+    HADOOP-4608. Don't print a stack trace when the example driver gets an
			
 
				+    unknown program to run. (Edward Yoon via omalley)
			
 
				+
			
 
				+    HADOOP-4645. Package HdfsProxy contrib project without the extra level
			
 
				+    of directories. (Kan Zhang via omalley)
			
 
				+
			
 
				+    HADOOP-4126. Allow access to HDFS web UI on EC2 (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-4612. Removes RunJar's dependency on JobClient.
			
 
				+    (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-4185. Adds setVerifyChecksum() method to FileSystem.
			
 
				+    (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-4523. Prevent too many tasks scheduled on a node from bringing
			
 
				+    it down by monitoring for cumulative memory usage across tasks.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-4640. Adds an input format that can split lzo compressed
			
 
				+    text files. (johan)
			
 
				+    
			
 
				+    HADOOP-4666. Launch reduces only after a few maps have run in the 
			
 
				+    Fair Scheduler. (Matei Zaharia via johan)    
			
 
				+
			
 
				+    HADOOP-4339. Remove redundant calls from FileSystem/FsShell when
			
 
				+    generating/processing ContentSummary. (David Phillips via cdouglas)
			
 
				+
			
 
				+    HADOOP-2774. Add counters tracking records spilled to disk in MapTask and
			
 
				+    ReduceTask. (Ravi Gummadi via cdouglas)
			
 
				+
			
 
				+    HADOOP-4513. Initialize jobs asynchronously in the capacity scheduler.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4649. Improve abstraction for spill indices. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3770. Add gridmix2, an iteration on the gridmix benchmark. (Runping
			
 
				+    Qi via cdouglas)
			
 
				+
			
 
				+    HADOOP-4708. Add support for dfsadmin commands in TestCLI. (Boris Shkolnik
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-4758. Add a splitter for metrics contexts to support more than one
			
 
				+    type of collector. (cdouglas)
			
 
				+
			
 
				+    HADOOP-4722. Add tests for dfsadmin quota error messages. (Boris Shkolnik
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-4690.  fuse-dfs - create source file/function + utils + config +
			
 
				+    main source files. (pete wyckoff via mahadev)
			
 
				+
			
 
				+    HADOOP-3750. Fix and enforce module dependencies. (Sharad Agarwal via
			
 
				+    tomwhite)
			
 
				+
			
 
				+    HADOOP-4747. Speed up FsShell::ls by removing redundant calls to the
			
 
				+    filesystem. (David Phillips via cdouglas)
			
 
				+
			
 
				+    HADOOP-4305. Improves the blacklisting strategy, whereby, tasktrackers
			
 
				+    that are blacklisted are not given tasks to run from other jobs, subject
			
 
				+    to the following conditions (all must be met):
			
 
				+    1) The TaskTracker has been blacklisted by at least 4 jobs (configurable)
			
 
				+    2) The TaskTracker has been blacklisted 50% more number of times than
			
 
				+       the average (configurable)
			
 
				+    3) The cluster has less than 50% trackers blacklisted
			
 
				+    Once in 24 hours, a TaskTracker blacklisted for all jobs is given a chance.
			
 
				+    Restarting the TaskTracker moves it out of the blacklist.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4688. Modify the MiniMRDFSSort unit test to spill multiple times,
			
 
				+    exercising the map-side merge code. (cdouglas)
			
 
				+
			
 
				+    HADOOP-4737. Adds the KILLED notification when jobs get killed.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4728. Add a test exercising different namenode configurations.
			
 
				+    (Boris Shkolnik via cdouglas)
			
 
				+
			
 
				+    HADOOP-4807. Adds JobClient commands to get the active/blacklisted tracker
			
 
				+    names. Also adds commands to display running/completed task attempt IDs. 
			
 
				+    (ddas)
			
 
				+
			
 
				+    HADOOP-4699. Remove checksum validation from map output servlet. (cdouglas)
			
 
				+
			
 
				+    HADOOP-4838. Added a registry to automate metrics and mbeans management.
			
 
				+    (Sanjay Radia via acmurthy) 
			
 
				+
			
 
				+    HADOOP-3136. Fixed the default scheduler to assign multiple tasks to each 
			
 
				+    tasktracker per heartbeat, when feasible. To ensure locality isn't hurt 
			
 
				+    too badly, the scheudler will not assign more than one off-switch task per 
			
 
				+    heartbeat. The heartbeat interval is also halved since the task-tracker is 
			
 
				+    fixed to no longer send out heartbeats on each task completion. A 
			
 
				+    slow-start for scheduling reduces is introduced to ensure that reduces 
			
 
				+    aren't started till sufficient number of maps are done, else reduces of 
			
 
				+    jobs whose maps aren't scheduled might swamp the cluster.
			
 
				+    Configuration changes to mapred-default.xml:
			
 
				+      add mapred.reduce.slowstart.completed.maps 
			
 
				+    (acmurthy)
			
 
				+
			
 
				+    HADOOP-4545. Add example and test case of secondary sort for the reduce.
			
 
				+    (omalley)
			
 
				+
			
 
				+    HADOOP-4753. Refactor gridmix2 to reduce code duplication. (cdouglas)
			
 
				+
			
 
				+    HADOOP-4909. Fix Javadoc and make some of the API more consistent in their
			
 
				+    use of the JobContext instead of Configuration. (omalley)
			
 
				+
			
 
				+    HADOOP-4920.  Stop storing Forrest output in Subversion. (cutting)
			
 
				+
			
 
				+    HADOOP-4948. Add parameters java5.home and forrest.home to the ant commands
			
 
				+    in test-patch.sh.  (Giridharan Kesavan via szetszwo)
			
 
				+
			
 
				+    HADOOP-4830. Add end-to-end test cases for testing queue capacities.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-4980. Improve code layout of capacity scheduler to make it 
			
 
				+    easier to fix some blocker bugs. (Vivek Ratan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4916. Make user/location of Chukwa installation configurable by an
			
 
				+    external properties file. (Eric Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4950. Make the CompressorStream, DecompressorStream, 
			
 
				+    BlockCompressorStream, and BlockDecompressorStream public to facilitate 
			
 
				+    non-Hadoop codecs. (omalley)
			
 
				+
			
 
				+    HADOOP-4843. Collect job history and configuration in Chukwa. (Eric Yang
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-5030. Build Chukwa RPM to install into configured directory. (Eric
			
 
				+    Yang via cdouglas)
			
 
				+    
			
 
				+    HADOOP-4828. Updates documents to do with configuration (HADOOP-4631).
			
 
				+    (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-4939. Adds a test that would inject random failures for tasks in 
			
 
				+    large jobs and would also inject TaskTracker failures. (ddas)
			
 
				+
			
 
				+    HADOOP-4944. A configuration file can include other configuration
			
 
				+    files. (Rama Ramasamy via dhruba)
			
 
				+
			
 
				+    HADOOP-4804. Provide Forrest documentation for the Fair Scheduler.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5248. A testcase that checks for the existence of job directory
			
 
				+    after the job completes. Fails if it exists. (ddas)
			
 
				+
			
 
				+    HADOOP-4664. Introduces multiple job initialization threads, where the 
			
 
				+    number of threads are configurable via mapred.jobinit.threads.
			
 
				+    (Matei Zaharia and Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-4191. Adds a testcase for JobHistory. (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-5466. Change documenation CSS style for headers and code. (Corinne
			
 
				+    Chandel via szetszwo)
			
 
				+
			
 
				+    HADOOP-5275. Add ivy directory and files to built tar.
			
 
				+    (Giridharan Kesavan via nigel)
			
 
				+
			
 
				+    HADOOP-5468. Add sub-menus to forrest documentation and make some minor
			
 
				+    edits.  (Corinne Chandel via szetszwo)
			
 
				+
			
 
				+    HADOOP-5437. Fix TestMiniMRDFSSort to properly test jvm-reuse. (omalley)
			
 
				+
			
 
				+    HADOOP-5521. Removes dependency of TestJobInProgress on RESTART_COUNT 
			
 
				+    JobHistory tag. (Ravi Gummadi via ddas)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+    HADOOP-3293. Fixes FileInputFormat to do provide locations for splits
			
 
				+    based on the rack/host that has the most number of bytes.
			
 
				+    (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-4683. Fixes Reduce shuffle scheduler to invoke
			
 
				+    getMapCompletionEvents in a separate thread. (Jothi Padmanabhan
			
 
				+    via ddas)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-4204. Fix findbugs warnings related to unused variables, naive
			
 
				+    Number subclass instantiation, Map iteration, and badly scoped inner
			
 
				+    classes. (Suresh Srinivas via cdouglas)
			
 
				+
			
 
				+    HADOOP-4207. Update derby jar file to release 10.4.2 release.
			
 
				+    (Prasad Chakka via dhruba)
			
 
				+
			
 
				+    HADOOP-4325. SocketInputStream.read() should return -1 in case EOF.
			
 
				+    (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4408. FsAction functions need not create new objects. (cdouglas)
			
 
				+
			
 
				+    HADOOP-4440.  TestJobInProgressListener tests for jobs killed in queued 
			
 
				+    state (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-4346. Implement blocking connect so that Hadoop is not affected
			
 
				+    by selector problem with JDK default implementation. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4388. If there are invalid blocks in the transfer list, Datanode
			
 
				+    should handle them and keep transferring the remaining blocks.  (Suresh
			
 
				+    Srinivas via szetszwo)
			
 
				+
			
 
				+    HADOOP-4587. Fix a typo in Mapper javadoc.  (Koji Noguchi via szetszwo)
			
 
				+
			
 
				+    HADOOP-4530. In fsck, HttpServletResponse sendError fails with
			
 
				+    IllegalStateException. (hairong)
			
 
				+
			
 
				+    HADOOP-4377. Fix a race condition in directory creation in
			
 
				+    NativeS3FileSystem. (David Phillips via cdouglas)
			
 
				+
			
 
				+    HADOOP-4621. Fix javadoc warnings caused by duplicate jars. (Kan Zhang via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-4566. Deploy new hive code to support more types.
			
 
				+    (Zheng Shao via dhruba)
			
 
				+
			
 
				+    HADOOP-4571. Add chukwa conf files to svn:ignore list. (Eric Yang via
			
 
				+    szetszwo)
			
 
				+
			
 
				+    HADOOP-4589. Correct PiEstimator output messages and improve the code
			
 
				+    readability. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4650. Correct a mismatch between the default value of
			
 
				+    local.cache.size in the config and the source. (Jeff Hammerbacher via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-4606. Fix cygpath error if the log directory does not exist.
			
 
				+    (szetszwo via omalley)
			
 
				+
			
 
				+    HADOOP-4141. Fix bug in ScriptBasedMapping causing potential infinite
			
 
				+    loop on misconfigured hadoop-site. (Aaron Kimball via tomwhite)
			
 
				+
			
 
				+    HADOOP-4691. Correct a link in the javadoc of IndexedSortable. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4598. '-setrep' command skips under-replicated blocks. (hairong)
			
 
				+
			
 
				+    HADOOP-4429. Set defaults for user, group in UnixUserGroupInformation so
			
 
				+    login fails more predictably when misconfigured. (Alex Loddengaard via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-4676. Fix broken URL in blacklisted tasktrackers page. (Amareshwari
			
 
				+    Sriramadasu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3422  Ganglia counter metrics are all reported with the metric
			
 
				+    name "value", so the counter values can not be seen. (Jason Attributor
			
 
				+    and Brian Bockelman via stack)
			
 
				+
			
 
				+    HADOOP-4704. Fix javadoc typos "the the". (szetszwo)
			
 
				+
			
 
				+    HADOOP-4677. Fix semantics of FileSystem::getBlockLocations to return
			
 
				+    meaningful values. (Hong Tang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4669. Use correct operator when evaluating whether access time is
			
 
				+    enabled (Dhruba Borthakur via cdouglas)
			
 
				+
			
 
				+    HADOOP-4732. Pass connection and read timeouts in the correct order when
			
 
				+    setting up fetch in reduce. (Amareshwari Sriramadasu via cdouglas)
			
 
				+
			
 
				+    HADOOP-4558. Fix capacity reclamation in capacity scheduler.
			
 
				+    (Amar Kamat via yhemanth)
			
 
				+
			
 
				+    HADOOP-4770. Fix rungridmix_2 script to work with RunJar. (cdouglas)
			
 
				+
			
 
				+    HADOOP-4738. When using git, the saveVersion script will use only the
			
 
				+    commit hash for the version and not the message, which requires escaping.
			
 
				+    (cdouglas)
			
 
				+
			
 
				+    HADOOP-4576. Show pending job count instead of task count in the UI per
			
 
				+    queue in capacity scheduler. (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4623. Maintain running tasks even if speculative execution is off.
			
 
				+    (Amar Kamat via yhemanth)
			
 
				+
			
 
				+    HADOOP-4786. Fix broken compilation error in 
			
 
				+    TestTrackerBlacklistAcrossJobs. (yhemanth)
			
 
				+
			
 
				+    HADOOP-4785. Fixes theJobTracker heartbeat to not make two calls to 
			
 
				+    System.currentTimeMillis(). (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4792. Add generated Chukwa configuration files to version control
			
 
				+    ignore lists. (cdouglas)
			
 
				+
			
 
				+    HADOOP-4796. Fix Chukwa test configuration, remove unused components. (Eric
			
 
				+    Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4708. Add binaries missed in the initial checkin for Chukwa. (Eric
			
 
				+    Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4805. Remove black list collector from Chukwa Agent HTTP Sender.
			
 
				+    (Eric Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4837. Move HADOOP_CONF_DIR configuration to chukwa-env.sh (Jerome
			
 
				+    Boulon via cdouglas)
			
 
				+
			
 
				+    HADOOP-4825. Use ps instead of jps for querying process status in Chukwa.
			
 
				+    (Eric Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4844. Fixed javadoc for
			
 
				+    org.apache.hadoop.fs.permission.AccessControlException to document that
			
 
				+    it's deprecated in favour of
			
 
				+    org.apache.hadoop.security.AccessControlException. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-4706. Close the underlying output stream in
			
 
				+    IFileOutputStream::close. (Jothi Padmanabhan via cdouglas)
			
 
				+
			
 
				+    HADOOP-4855. Fixed command-specific help messages for refreshServiceAcl in
			
 
				+    DFSAdmin and MRAdmin. (acmurthy)
			
 
				+
			
 
				+    HADOOP-4820. Remove unused method FSNamesystem::deleteInSafeMode. (Suresh
			
 
				+    Srinivas via cdouglas)
			
 
				+
			
 
				+    HADOOP-4698. Lower io.sort.mb to 10 in the tests and raise the junit memory
			
 
				+    limit to 512m from 256m. (Nigel Daley via cdouglas)
			
 
				+
			
 
				+    HADOOP-4860. Split TestFileTailingAdapters into three separate tests to
			
 
				+    avoid contention. (Eric Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-3921. Fixed clover (code coverage) target to work with JDK 6.
			
 
				+    (tomwhite via nigel)
			
 
				+
			
 
				+    HADOOP-4845. Modify the reduce input byte counter to record only the
			
 
				+    compressed size and add a human-readable label. (Yongqiang He via cdouglas)
			
 
				+
			
 
				+    HADOOP-4458. Add a test creating symlinks in the working directory.
			
 
				+    (Amareshwari Sriramadasu via cdouglas)
			
 
				+
			
 
				+    HADOOP-4879. Fix org.apache.hadoop.mapred.Counters to correctly define
			
 
				+    Object.equals rather than depend on contentEquals api. (omalley via 
			
 
				+    acmurthy)
			
 
				+
			
 
				+    HADOOP-4791. Fix rpm build process for Chukwa. (Eric Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4771. Correct initialization of the file count for directories 
			
 
				+    with quotas. (Ruyue Ma via shv)
			
 
				+
			
 
				+    HADOOP-4878. Fix eclipse plugin classpath file to point to ivy's resolved
			
 
				+    lib directory and added the same to test-patch.sh. (Giridharan Kesavan via
			
 
				+    acmurthy)
			
 
				+
			
 
				+    HADOOP-4774. Fix default values of some capacity scheduler configuration
			
 
				+    items which would otherwise not work on a fresh checkout.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4876. Fix capacity scheduler reclamation by updating count of
			
 
				+    pending tasks correctly. (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4849. Documentation for Service Level Authorization implemented in
			
 
				+    HADOOP-4348. (acmurthy)
			
 
				+
			
 
				+    HADOOP-4827. Replace Consolidator with Aggregator macros in Chukwa (Eric
			
 
				+    Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4894. Correctly parse ps output in Chukwa jettyCollector.sh. (Ari
			
 
				+    Rabkin via cdouglas)
			
 
				+
			
 
				+    HADOOP-4892. Close fds out of Chukwa ExecPlugin. (Ari Rabkin via cdouglas)
			
 
				+
			
 
				+    HADOOP-4889. Fix permissions in RPM packaging. (Eric Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4869. Fixes the TT-JT heartbeat to have an explicit flag for 
			
 
				+    restart apart from the initialContact flag that there was earlier.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4716. Fixes ReduceTask.java to clear out the mapping between
			
 
				+    hosts and MapOutputLocation upon a JT restart (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-4880. Removes an unnecessary testcase from TestJobTrackerRestart.
			
 
				+    (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-4924. Fixes a race condition in TaskTracker re-init. (ddas)
			
 
				+
			
 
				+    HADOOP-4854. Read reclaim capacity interval from capacity scheduler 
			
 
				+    configuration. (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4896. HDFS Fsck does not load HDFS configuration. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4956. Creates TaskStatus for failed tasks with an empty Counters 
			
 
				+    object instead of null. (ddas)
			
 
				+
			
 
				+    HADOOP-4979. Fix capacity scheduler to block cluster for failed high
			
 
				+    RAM requirements across task types. (Vivek Ratan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4949. Fix native compilation. (Chris Douglas via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4787. Fixes the testcase TestTrackerBlacklistAcrossJobs which was
			
 
				+    earlier failing randomly. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4914. Add description fields to Chukwa init.d scripts (Eric Yang via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-4884. Make tool tip date format match standard HICC format. (Eric
			
 
				+    Yang via cdouglas)
			
 
				+
			
 
				+    HADOOP-4925. Make Chukwa sender properties configurable. (Ari Rabkin via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-4947. Make Chukwa command parsing more forgiving of whitespace. (Ari
			
 
				+    Rabkin via cdouglas)
			
 
				+
			
 
				+    HADOOP-5026. Make chukwa/bin scripts executable in repository. (Andy
			
 
				+    Konwinski via cdouglas)
			
 
				+
			
 
				+    HADOOP-4977. Fix a deadlock between the reclaimCapacity and assignTasks
			
 
				+    in capacity scheduler. (Vivek Ratan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4988. Fix reclaim capacity to work even when there are queues with
			
 
				+    no capacity. (Vivek Ratan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5065. Remove generic parameters from argument to 
			
 
				+    setIn/OutputFormatClass so that it works with SequenceIn/OutputFormat.
			
 
				+    (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-4818. Pass user config to instrumentation API. (Eric Yang via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-4993. Fix Chukwa agent configuration and startup to make it both
			
 
				+    more modular and testable. (Ari Rabkin via cdouglas)
			
 
				+
			
 
				+    HADOOP-5048. Fix capacity scheduler to correctly cleanup jobs that are
			
 
				+    killed after initialization, but before running. 
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    HADOOP-4671. Mark loop control variables shared between threads as
			
 
				+    volatile. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5079. HashFunction inadvertently destroys some randomness
			
 
				+    (Jonathan Ellis via stack)
			
 
				+
			
 
				+    HADOOP-4999. A failure to write to FsEditsLog results in 
			
 
				+    IndexOutOfBounds exception. (Boris Shkolnik via rangadi)
			
 
				+
			
 
				+    HADOOP-5139. Catch IllegalArgumentException during metrics registration 
			
 
				+    in RPC.  (Hairong Kuang via szetszwo)
			
 
				+
			
 
				+    HADOOP-5085. Copying a file to local with Crc throws an exception.
			
 
				+    (hairong)
			
 
				+
			
 
				+    HADOOP-5211. Fix check for job completion in TestSetupAndCleanupFailure.
			
 
				+    (enis)
			
 
				+
			
 
				+    HADOOP-5254. The Configuration class should be able to work with XML
			
 
				+    parsers that do not support xmlinclude. (Steve Loughran via dhruba)
			
 
				+
			
 
				+    HADOOP-4692. Namenode in infinite loop for replicating/deleting corrupt
			
 
				+    blocks. (hairong)
			
 
				+
			
 
				+    HADOOP-5255. Fix use of Math.abs to avoid overflow. (Jonathan Ellis via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-5269. Fixes a problem to do with tasktracker holding on to 
			
 
				+    FAILED_UNCLEAN or KILLED_UNCLEAN tasks forever. (Amareshwari Sriramadasu
			
 
				+    via ddas) 
			
 
				+
			
 
				+    HADOOP-5214. Fixes a ConcurrentModificationException while the Fairshare
			
 
				+    Scheduler accesses the tasktrackers stored by the JobTracker.
			
 
				+    (Rahul Kumar Singh via yhemanth)
			
 
				+
			
 
				+    HADOOP-5233. Addresses the three issues - Race condition in updating
			
 
				+    status, NPE in TaskTracker task localization when the conf file is missing
			
 
				+    (HADOOP-5234) and NPE in handling KillTaskAction of a cleanup task 
			
 
				+    (HADOOP-5235). (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-5247. Introduces a broadcast of KillJobAction to all trackers when
			
 
				+    a job finishes. This fixes a bunch of problems to do with NPE when a 
			
 
				+    completed job is not in memory and a tasktracker comes to the jobtracker 
			
 
				+    with a status report of a task belonging to that job. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5282. Fixed job history logs for task attempts that are
			
 
				+    failed by the JobTracker, say due to lost task trackers. (Amar
			
 
				+    Kamat via yhemanth)
			
 
				+
			
 
				+    HADOOP-5241. Fixes a bug in disk-space resource estimation. Makes
			
 
				+    the estimation formula linear where blowUp =
			
 
				+    Total-Output/Total-Input. (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-5142. Fix MapWritable#putAll to store key/value classes. 
			
 
				+    (Do??acan G??ney via enis)
			
 
				+
			
 
				+    HADOOP-4744. Workaround for jetty6 returning -1 when getLocalPort
			
 
				+    is invoked on the connector. The workaround patch retries a few
			
 
				+    times before failing.  (Jothi Padmanabhan via yhemanth)
			
 
				+
			
 
				+    HADOOP-5280. Adds a check to prevent a task state transition from
			
 
				+    FAILED to any of UNASSIGNED, RUNNING, COMMIT_PENDING or
			
 
				+    SUCCEEDED. (ddas)
			
 
				+
			
 
				+    HADOOP-5272. Fixes a problem to do with detecting whether an
			
 
				+    attempt is the first attempt of a Task. This affects JobTracker
			
 
				+    restart. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5306. Fixes a problem to do with logging/parsing the http port of a 
			
 
				+    lost tracker. Affects JobTracker restart. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5111. Fix Job::set* methods to work with generics. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5274. Fix gridmix2 dependency on wordcount example. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5145. Balancer sometimes runs out of memory after running
			
 
				+    days or weeks.  (hairong)
			
 
				+
			
 
				+    HADOOP-5338. Fix jobtracker restart to clear task completion
			
 
				+    events cached by tasktrackers forcing them to fetch all events
			
 
				+    afresh, thus avoiding missed task completion events on the
			
 
				+    tasktrackers. (Amar Kamat via yhemanth)
			
 
				+
			
 
				+    HADOOP-4695. Change TestGlobalFilter so that it allows a web page to be
			
 
				+    filtered more than once for a single access.  (Kan Zhang via szetszwo) 
			
 
				+
			
 
				+    HADOOP-5298. Change TestServletFilter so that it allows a web page to be
			
 
				+    filtered more than once for a single access.  (szetszwo) 
			
 
				+
			
 
				+    HADOOP-5432. Disable ssl during unit tests in hdfsproxy, as it is unused
			
 
				+    and causes failures. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5416. Correct the shell command "fs -test" forrest doc description.
			
 
				+    (Ravi Phulari via szetszwo) 
			
 
				+
			
 
				+    HADOOP-5327. Fixed job tracker to remove files from system directory on
			
 
				+    ACL check failures and also check ACLs on restart.
			
 
				+    (Amar Kamat via yhemanth)
			
 
				+
			
 
				+    HADOOP-5395. Change the exception message when a job is submitted to an
			
 
				+    invalid queue. (Rahul Kumar Singh via yhemanth)
			
 
				+
			
 
				+    HADOOP-5276. Fixes a problem to do with updating the start time of
			
 
				+    a task when the tracker that ran the task is lost. (Amar Kamat via
			
 
				+    ddas)
			
 
				+
			
 
				+    HADOOP-5278. Fixes a problem to do with logging the finish time of
			
 
				+    a task during recovery (after a JobTracker restart). (Amar Kamat
			
 
				+    via ddas)
			
 
				+
			
 
				+    HADOOP-5490. Fixes a synchronization problem in the
			
 
				+    EagerTaskInitializationListener class. (Jothi Padmanabhan via
			
 
				+    ddas)
			
 
				+
			
 
				+    HADOOP-5493. The shuffle copier threads return the codecs back to
			
 
				+    the pool when the shuffle completes. (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-5414. Fixes IO exception while executing hadoop fs -touchz
			
 
				+    fileName by making sure that lease renewal thread exits before dfs
			
 
				+    client exits.  (hairong)
			
 
				+
			
 
				+    HADOOP-5103. FileInputFormat now reuses the clusterMap network
			
 
				+    topology object and that brings down the log messages in the
			
 
				+    JobClient to do with NetworkTopology.add significantly. (Jothi
			
 
				+    Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-5483. Fixes a problem in the Directory Cleanup Thread due to which
			
 
				+    TestMiniMRWithDFS sometimes used to fail. (ddas) 
			
 
				+
			
 
				+    HADOOP-5281. Prevent sharing incompatible ZlibCompressor instances between
			
 
				+    GzipCodec and DefaultCodec. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5463. Balancer throws "Not a host:port pair" unless port is
			
 
				+    specified in fs.default.name. (Stuart White via hairong)
			
 
				+
			
 
				+    HADOOP-5514. Fix JobTracker metrics and add metrics for wating, failed
			
 
				+    tasks. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5516. Fix NullPointerException in TaskMemoryManagerThread
			
 
				+    that comes when monitored processes disappear when the thread is
			
 
				+    running.  (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-5382. Support combiners in the new context object API. (omalley)
			
 
				+
			
 
				+    HADOOP-5471. Fixes a problem to do with updating the log.index file in the 
			
 
				+    case where a cleanup task is run. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-5534. Fixed a deadlock in Fair scheduler's servlet.
			
 
				+    (Rahul Kumar Singh via yhemanth)
			
 
				+
			
 
				+    HADOOP-5328. Fixes a problem in the renaming of job history files during 
			
 
				+    job recovery. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5417. Don't ignore InterruptedExceptions that happen when calling 
			
 
				+    into rpc. (omalley)
			
 
				+
			
 
				+    HADOOP-5320. Add a close() in TestMapReduceLocal.  (Jothi Padmanabhan
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-5520. Fix a typo in disk quota help message.  (Ravi Phulari
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-5519. Remove claims from mapred-default.xml that prime numbers
			
 
				+    of tasks are helpful.  (Owen O'Malley via szetszwo)
			
 
				+
			
 
				+    HADOOP-5484. TestRecoveryManager fails wtih FileAlreadyExistsException.
			
 
				+    (Amar Kamat via hairong)
			
 
				+
			
 
				+    HADOOP-5564. Limit the JVM heap size in the java command for initializing
			
 
				+    JAVA_PLATFORM.  (Suresh Srinivas via szetszwo)
			
 
				+
			
 
				+    HADOOP-5565. Add API for failing/finalized jobs to the JT metrics
			
 
				+    instrumentation. (Jerome Boulon via cdouglas)
			
 
				+
			
 
				+    HADOOP-5390. Remove duplicate jars from tarball, src from binary tarball
			
 
				+    added by hdfsproxy. (Zhiyong Zhang via cdouglas)
			
 
				+
			
 
				+    HADOOP-5066. Building binary tarball should not build docs/javadocs, copy
			
 
				+    src, or run jdiff. (Giridharan Kesavan via cdouglas)
			
 
				+
			
 
				+    HADOOP-5459. Fix undetected CRC errors where intermediate output is closed
			
 
				+    before it has been completely consumed. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5571. Remove widening primitive conversion in TupleWritable mask
			
 
				+    manipulation. (Jingkei Ly via cdouglas)
			
 
				+
			
 
				+    HADOOP-5588. Remove an unnecessary call to listStatus(..) in
			
 
				+    FileSystem.globStatusInternal(..).  (Hairong Kuang via szetszwo)
			
 
				+
			
 
				+    HADOOP-5473. Solves a race condition in killing a task - the state is KILLED
			
 
				+    if there is a user request pending to kill the task and the TT reported
			
 
				+    the state as SUCCESS. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-5576. Fix LocalRunner to work with the new context object API in
			
 
				+    mapreduce. (Tom White via omalley)
			
 
				+
			
 
				+    HADOOP-4374. Installs a shutdown hook in the Task JVM so that log.index is
			
 
				+    updated before the JVM exits. Also makes the update to log.index atomic.
			
 
				+    (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-5577. Add a verbose flag to mapreduce.Job.waitForCompletion to get
			
 
				+    the running job's information printed to the user's stdout as it runs.
			
 
				+    (omalley)
			
 
				+
			
 
				+    HADOOP-5607. Fix NPE in TestCapacityScheduler. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5605. All the replicas incorrectly got marked as corrupt. (hairong)
			
 
				+
			
 
				+    HADOOP-5337. JobTracker, upon restart, now waits for the TaskTrackers to
			
 
				+    join back before scheduling new tasks. This fixes race conditions associated
			
 
				+    with greedy scheduling as was the case earlier. (Amar Kamat via ddas) 
			
 
				+
			
 
				+    HADOOP-5227. Fix distcp so -update and -delete can be meaningfully
			
 
				+    combined. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-5305. Increase number of files and print debug messages in
			
 
				+    TestCopyFiles.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5548. Add synchronization for JobTracker methods in RecoveryManager.
			
 
				+    (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    HADOOP-3810. NameNode seems unstable on a cluster with little space left.
			
 
				+    (hairong)
			
 
				+
			
 
				+    HADOOP-5068. Fix NPE in TestCapacityScheduler.  (Vinod Kumar Vavilapalli
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-5585. Clear FileSystem statistics between tasks when jvm-reuse
			
 
				+    is enabled. (omalley)
			
 
				+
			
 
				+    HADOOP-5394. JobTracker might schedule 2 attempts of the same task 
			
 
				+    with the same attempt id across restarts. (Amar Kamat via sharad)
			
 
				+
			
 
				+    HADOOP-5645. After HADOOP-4920 we need a place to checkin
			
 
				+    releasenotes.html. (nigel)
			
 
				+
			
 
				+Release 0.19.2 - 2009-06-30
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-5154. Fixes a deadlock in the fairshare scheduler. 
			
 
				+    (Matei Zaharia via yhemanth)
			
 
				+   
			
 
				+    HADOOP-5146. Fixes a race condition that causes LocalDirAllocator to miss
			
 
				+    files.  (Devaraj Das via yhemanth)
			
 
				+
			
 
				+    HADOOP-4638. Fixes job recovery to not crash the job tracker for problems
			
 
				+    with a single job file. (Amar Kamat via yhemanth)
			
 
				+
			
 
				+    HADOOP-5384. Fix a problem that DataNodeCluster creates blocks with
			
 
				+    generationStamp == 1.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-5376. Fixes the code handling lost tasktrackers to set the task state
			
 
				+    to KILLED_UNCLEAN only for relevant type of tasks.
			
 
				+    (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    HADOOP-5285. Fixes the issues - (1) obtainTaskCleanupTask checks whether job is
			
 
				+    inited before trying to lock the JobInProgress (2) Moves the CleanupQueue class
			
 
				+    outside the TaskTracker and makes it a generic class that is used by the 
			
 
				+    JobTracker also for deleting the paths on the job's output fs. (3) Moves the
			
 
				+    references to completedJobStore outside the block where the JobTracker is locked.
			
 
				+    (ddas)
			
 
				+
			
 
				+    HADOOP-5392. Fixes a problem to do with JT crashing during recovery when
			
 
				+    the job files are garbled. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-5332. Appending to files is not allowed (by default) unless
			
 
				+    dfs.support.append is set to true. (dhruba)
			
 
				+
			
 
				+    HADOOP-5333. libhdfs supports appending to files. (dhruba)
			
 
				+
			
 
				+    HADOOP-3998. Fix dfsclient exception when JVM is shutdown. (dhruba)
			
 
				+
			
 
				+    HADOOP-5440. Fixes a problem to do with removing a taskId from the list
			
 
				+    of taskIds that the TaskTracker's TaskMemoryManager manages.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+ 
			
 
				+    HADOOP-5446. Restore TaskTracker metrics. (cdouglas)
			
 
				+
			
 
				+    HADOOP-5449. Fixes the history cleaner thread. 
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-5479. NameNode should not send empty block replication request to
			
 
				+    DataNode. (hairong)
			
 
				+
			
 
				+    HADOOP-5259. Job with output hdfs:/user/<username>/outputpath (no 
			
 
				+    authority) fails with Wrong FS. (Doug Cutting via hairong)
			
 
				+
			
 
				+    HADOOP-5522. Documents the setup/cleanup tasks in the mapred tutorial.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-5549. ReplicationMonitor should schedule both replication and
			
 
				+    deletion work in one iteration. (hairong)
			
 
				+
			
 
				+    HADOOP-5554. DataNodeCluster and CreateEditsLog should create blocks with
			
 
				+    the same generation stamp value. (hairong via szetszwo)
			
 
				+
			
 
				+    HADOOP-5231. Clones the TaskStatus before passing it to the JobInProgress.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4719. Fix documentation of 'ls' format for FsShell. (Ravi Phulari
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-5374. Fixes a NPE problem in getTasksToSave method.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4780. Cache the size of directories in DistributedCache, avoiding
			
 
				+    long delays in recalculating it. (He Yongqiang via cdouglas)
			
 
				+
			
 
				+    HADOOP-5551. Prevent directory destruction on file create.
			
 
				+    (Brian Bockelman via shv)
			
 
				+
			
 
				+    HADOOP-5671. Fix FNF exceptions when copying from old versions of
			
 
				+    HftpFileSystem. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-5213. Fix Null pointer exception caused when bzip2compression 
			
 
				+    was used and user closed a output stream without writing any data.
			
 
				+    (Zheng Shao via dhruba)
			
 
				+
			
 
				+    HADOOP-5579. Set errno correctly in libhdfs for permission, quota, and FNF
			
 
				+    conditions. (Brian Bockelman via cdouglas)
			
 
				+
			
 
				+    HADOOP-5816. Fixes a problem in the KeyFieldBasedComparator to do with
			
 
				+    ArrayIndexOutOfBounds exception. (He Yongqiang via ddas)
			
 
				+
			
 
				+    HADOOP-5951. Add Apache license header to StorageInfo.java.  (Suresh
			
 
				+    Srinivas via szetszwo)
			
 
				+
			
 
				+Release 0.19.1 - 2009-02-23 
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-4739. Fix spelling and grammar, improve phrasing of some sections in
			
 
				+    mapred tutorial. (Vivek Ratan via cdouglas)
			
 
				+
			
 
				+    HADOOP-3894. DFSClient logging improvements. (Steve Loughran via shv)
			
 
				+
			
 
				+    HADOOP-5126. Remove empty file BlocksWithLocations.java (shv)
			
 
				+
			
 
				+    HADOOP-5127. Remove public methods in FSDirectory. (Jakob Homan via shv)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-4697. Fix getBlockLocations in KosmosFileSystem to handle multiple
			
 
				+    blocks correctly. (Sriram Rao via cdouglas)
			
 
				+
			
 
				+    HADOOP-4420. Add null checks for job, caused by invalid job IDs.
			
 
				+    (Aaron Kimball via tomwhite)
			
 
				+
			
 
				+    HADOOP-4632. Fix TestJobHistoryVersion to use test.build.dir instead of the
			
 
				+    current workding directory for scratch space. (Amar Kamat via cdouglas)
			
 
				+
			
 
				+    HADOOP-4508. Fix FSDataOutputStream.getPos() for append. (dhruba via
			
 
				+    szetszwo)
			
 
				+
			
 
				+    HADOOP-4727. Fix a group checking bug in fill_stat_structure(...) in
			
 
				+    fuse-dfs.  (Brian Bockelman via szetszwo)
			
 
				+
			
 
				+    HADOOP-4836. Correct typos in mapred related documentation.  (Jord? Polo
			
 
				+    via szetszwo)
			
 
				+
			
 
				+    HADOOP-4821. Usage description in the Quotas guide documentations are
			
 
				+    incorrect. (Boris Shkolnik via hairong)
			
 
				+
			
 
				+    HADOOP-4847. Moves the loading of OutputCommitter to the Task.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4966. Marks completed setup tasks for removal. 
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4982. TestFsck should run in Eclipse. (shv)
			
 
				+
			
 
				+    HADOOP-5008. TestReplication#testPendingReplicationRetry leaves an opened
			
 
				+    fd unclosed. (hairong)
			
 
				+
			
 
				+    HADOOP-4906. Fix TaskTracker OOM by keeping a shallow copy of JobConf in
			
 
				+    TaskTracker.TaskInProgress. (Sharad Agarwal via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4918. Fix bzip2 compression to work with Sequence Files.
			
 
				+    (Zheng Shao via dhruba).
			
 
				+
			
 
				+    HADOOP-4965. TestFileAppend3 should close FileSystem. (shv)
			
 
				+
			
 
				+    HADOOP-4967. Fixes a race condition in the JvmManager to do with killing
			
 
				+    tasks. (ddas)
			
 
				+
			
 
				+    HADOOP-5009. DataNode#shutdown sometimes leaves data block scanner
			
 
				+    verification log unclosed. (hairong)
			
 
				+
			
 
				+    HADOOP-5086. Use the appropriate FileSystem for trash URIs. (cdouglas)
			
 
				+    
			
 
				+    HADOOP-4955. Make DBOutputFormat us column names from setOutput().
			
 
				+    (Kevin Peterson via enis) 
			
 
				+
			
 
				+    HADOOP-4862. Minor : HADOOP-3678 did not remove all the cases of 
			
 
				+    spurious IOExceptions logged by DataNode. (Raghu Angadi) 
			
 
				+
			
 
				+    HADOOP-5034. NameNode should send both replication and deletion requests
			
 
				+    to DataNode in one reply to a heartbeat. (hairong)
			
 
				+
			
 
				+    HADOOP-4759. Removes temporary output directory for failed and killed
			
 
				+    tasks  by launching special CLEANUP tasks for the same.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-5161. Accepted sockets do not get placed in
			
 
				+    DataXceiverServer#childSockets. (hairong)
			
 
				+
			
 
				+    HADOOP-5193. Correct calculation of edits modification time. (shv)
			
 
				+
			
 
				+    HADOOP-4494. Allow libhdfs to append to files.
			
 
				+    (Pete Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-5166. Fix JobTracker restart to work when ACLs are configured
			
 
				+    for the JobTracker. (Amar Kamat via yhemanth).
			
 
				+
			
 
				+    HADOOP-5067. Fixes TaskInProgress.java to keep track of count of failed and
			
 
				+    killed tasks correctly. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4760. HDFS streams should not throw exceptions when closed twice. 
			
 
				+    (enis)
			
 
				+
			
 
				+Release 0.19.0 - 2008-11-18
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-3595. Remove deprecated methods for mapred.combine.once 
			
 
				+    functionality, which was necessary to providing backwards
			
 
				+    compatible combiner semantics for 0.18. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-3667. Remove the following deprecated methods from JobConf:
			
 
				+      addInputPath(Path)
			
 
				+      getInputPaths()
			
 
				+      getMapOutputCompressionType()
			
 
				+      getOutputPath()
			
 
				+      getSystemDir()
			
 
				+      setInputPath(Path)
			
 
				+      setMapOutputCompressionType(CompressionType style)
			
 
				+      setOutputPath(Path)
			
 
				+    (Amareshwari Sriramadasu via omalley)
			
 
				+
			
 
				+    HADOOP-3652. Remove deprecated class OutputFormatBase.
			
 
				+    (Amareshwari Sriramadasu via cdouglas)
			
 
				+
			
 
				+    HADOOP-2885. Break the hadoop.dfs package into separate packages under
			
 
				+    hadoop.hdfs that reflect whether they are client, server, protocol, 
			
 
				+    etc. DistributedFileSystem and DFSClient have moved and are now 
			
 
				+    considered package private. (Sanjay Radia via omalley)
			
 
				+
			
 
				+    HADOOP-2325.  Require Java 6. (cutting)
			
 
				+
			
 
				+    HADOOP-372.  Add support for multiple input paths with a different
			
 
				+    InputFormat and Mapper for each path.  (Chris Smith via tomwhite)
			
 
				+
			
 
				+    HADOOP-1700.  Support appending to file in HDFS. (dhruba)
			
 
				+
			
 
				+    HADOOP-3792. Make FsShell -test consistent with unix semantics, returning
			
 
				+    zero for true and non-zero for false. (Ben Slusky via cdouglas)
			
 
				+
			
 
				+    HADOOP-3664. Remove the deprecated method InputFormat.validateInput,
			
 
				+    which is no longer needed. (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-3549. Give more meaningful errno's in libhdfs. In particular, 
			
 
				+    EACCES is returned for permission problems. (Ben Slusky via omalley)
			
 
				+
			
 
				+    HADOOP-4036. ResourceStatus was added to TaskTrackerStatus by HADOOP-3759,
			
 
				+    so increment the InterTrackerProtocol version. (Hemanth Yamijala via 
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-3150. Moves task promotion to tasks. Defines a new interface for
			
 
				+    committing output files. Moves job setup to jobclient, and moves jobcleanup
			
 
				+    to a separate task. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3446. Keep map outputs in memory during the reduce. Remove
			
 
				+    fs.inmemory.size.mb and replace with properties defining in memory map
			
 
				+    output retention during the shuffle and reduce relative to maximum heap
			
 
				+    usage. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3245. Adds the feature for supporting JobTracker restart. Running
			
 
				+    jobs can be recovered from the history file. The history file format has
			
 
				+    been modified to support recovery. The task attempt ID now has the 
			
 
				+    JobTracker start time to disinguish attempts of the same TIP across 
			
 
				+    restarts. (Amar Ramesh Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-4007. REMOVE DFSFileInfo - FileStatus is sufficient. 
			
 
				+    (Sanjay Radia via hairong)
			
 
				+
			
 
				+    HADOOP-3722. Fixed Hadoop Streaming and Hadoop Pipes to use the Tool
			
 
				+    interface and GenericOptionsParser. (Enis Soztutar via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2816. Cluster summary at name node web reports the space
			
 
				+    utilization as:
			
 
				+    Configured Capacity: capacity of all the data directories - Reserved space
			
 
				+    Present Capacity: Space available for dfs,i.e. remaining+used space
			
 
				+    DFS Used%: DFS used space/Present Capacity
			
 
				+    (Suresh Srinivas via hairong)
			
 
				+
			
 
				+    HADOOP-3938. Disk space quotas for HDFS. This is similar to namespace
			
 
				+    quotas in 0.18. (rangadi)
			
 
				+
			
 
				+    HADOOP-4293. Make Configuration Writable and remove unreleased 
			
 
				+    WritableJobConf. Configuration.write is renamed to writeXml. (omalley)
			
 
				+
			
 
				+    HADOOP-4281. Change dfsadmin to report available disk space in a format
			
 
				+    consistent with the web interface as defined in HADOOP-2816. (Suresh
			
 
				+    Srinivas via cdouglas)
			
 
				+
			
 
				+    HADOOP-4430. Further change the cluster summary at name node web that was
			
 
				+    changed in HADOOP-2816:
			
 
				+      Non DFS Used - This indicates the disk space taken by non DFS file from
			
 
				+                     the Configured capacity
			
 
				+      DFS Used % - DFS Used % of Configured Capacity 
			
 
				+      DFS Remaining % - Remaing % Configured Capacity available for DFS use
			
 
				+    DFS command line report reflects the same change. Config parameter 
			
 
				+    dfs.datanode.du.pct is no longer used and is removed from the 
			
 
				+    hadoop-default.xml. (Suresh Srinivas via hairong)
			
 
				+
			
 
				+    HADOOP-4116. Balancer should provide better resource management. (hairong)
			
 
				+
			
 
				+    HADOOP-4599. BlocksMap and BlockInfo made package private. (shv)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-3341. Allow streaming jobs to specify the field separator for map
			
 
				+    and reduce input and output. The new configuration values are:
			
 
				+      stream.map.input.field.separator
			
 
				+      stream.map.output.field.separator
			
 
				+      stream.reduce.input.field.separator
			
 
				+      stream.reduce.output.field.separator
			
 
				+    All of them default to "\t". (Zheng Shao via omalley)
			
 
				+
			
 
				+    HADOOP-3479. Defines the configuration file for the resource manager in 
			
 
				+    Hadoop. You can configure various parameters related to scheduling, such 
			
 
				+    as queues and queue properties here. The properties for a queue follow a
			
 
				+    naming convention,such as, hadoop.rm.queue.queue-name.property-name.
			
 
				+    (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-3149. Adds a way in which map/reducetasks can create multiple 
			
 
				+    outputs. (Alejandro Abdelnur via ddas)
			
 
				+
			
 
				+    HADOOP-3714.  Add a new contrib, bash-tab-completion, which enables 
			
 
				+    bash tab completion for the bin/hadoop script. See the README file
			
 
				+    in the contrib directory for the installation. (Chris Smith via enis)
			
 
				+
			
 
				+    HADOOP-3730. Adds a new JobConf constructor that disables loading
			
 
				+    default configurations. (Alejandro Abdelnur via ddas)
			
 
				+
			
 
				+    HADOOP-3772. Add a new Hadoop Instrumentation api for the JobTracker and
			
 
				+    the TaskTracker, refactor Hadoop Metrics as an implementation of the api.
			
 
				+    (Ari Rabkin via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2302. Provides a comparator for numerical sorting of key fields.
			
 
				+    (ddas)
			
 
				+
			
 
				+    HADOOP-153. Provides a way to skip bad records. (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-657. Free disk space should be modelled and used by the scheduler
			
 
				+    to make scheduling decisions. (Ari Rabkin via omalley)
			
 
				+
			
 
				+    HADOOP-3719. Initial checkin of Chukwa, which is a data collection and 
			
 
				+    analysis framework. (Jerome Boulon, Andy Konwinski, Ari Rabkin, 
			
 
				+    and Eric Yang)
			
 
				+
			
 
				+    HADOOP-3873. Add -filelimit and -sizelimit options to distcp to cap the
			
 
				+    number of files/bytes copied in a particular run to support incremental
			
 
				+    updates and mirroring. (TszWo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3585. FailMon package for hardware failure monitoring and 
			
 
				+    analysis of anomalies. (Ioannis Koltsidas via dhruba)
			
 
				+
			
 
				+    HADOOP-1480. Add counters to the C++ Pipes API. (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-3854. Add support for pluggable servlet filters in the HttpServers.
			
 
				+    (Tsz Wo (Nicholas) Sze via omalley)
			
 
				+
			
 
				+    HADOOP-3759. Provides ability to run memory intensive jobs without 
			
 
				+    affecting other running tasks on the nodes. (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-3746. Add a fair share scheduler. (Matei Zaharia via omalley)
			
 
				+
			
 
				+    HADOOP-3754. Add a thrift interface to access HDFS. (dhruba via omalley)
			
 
				+
			
 
				+    HADOOP-3828. Provides a way to write skipped records to DFS.
			
 
				+    (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-3948. Separate name-node edits and fsimage directories.
			
 
				+    (Lohit Vijayarenu via shv)
			
 
				+
			
 
				+    HADOOP-3939. Add an option to DistCp to delete files at the destination
			
 
				+    not present at the source. (Tsz Wo (Nicholas) Sze via cdouglas)
			
 
				+
			
 
				+    HADOOP-3601. Add a new contrib module for Hive, which is a sql-like
			
 
				+    query processing tool that uses map/reduce. (Ashish Thusoo via omalley)
			
 
				+
			
 
				+    HADOOP-3866. Added sort and multi-job updates in the JobTracker web ui.
			
 
				+    (Craig Weisenfluh via omalley)
			
 
				+
			
 
				+    HADOOP-3698. Add access control to control who is allowed to submit or 
			
 
				+    modify jobs in the JobTracker. (Hemanth Yamijala via omalley)
			
 
				+
			
 
				+    HADOOP-1869. Support access times for HDFS files. (dhruba)
			
 
				+
			
 
				+    HADOOP-3941. Extend FileSystem API to return file-checksums.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-3581. Prevents memory intensive user tasks from taking down 
			
 
				+    nodes. (Vinod K V via ddas)
			
 
				+
			
 
				+    HADOOP-3970. Provides a way to recover counters written to JobHistory.
			
 
				+    (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-3702. Adds ChainMapper and ChainReducer classes allow composing
			
 
				+    chains of Maps and Reduces in a single Map/Reduce job, something like 
			
 
				+    MAP+ / REDUCE MAP*. (Alejandro Abdelnur via ddas)
			
 
				+
			
 
				+    HADOOP-3445. Add capacity scheduler that provides guaranteed capacities to 
			
 
				+    queues as a percentage of the cluster. (Vivek Ratan via omalley)
			
 
				+
			
 
				+    HADOOP-3992. Add a synthetic load generation facility to the test
			
 
				+    directory. (hairong via szetszwo)
			
 
				+
			
 
				+    HADOOP-3981. Implement a distributed file checksum algorithm in HDFS
			
 
				+    and change DistCp to use file checksum for comparing src and dst files
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-3829. Narrown down skipped records based on user acceptable value.
			
 
				+    (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-3930. Add common interfaces for the pluggable schedulers and the
			
 
				+    cli & gui clients. (Sreekanth Ramakrishnan via omalley)
			
 
				+
			
 
				+    HADOOP-4176. Implement getFileChecksum(Path) in HftpFileSystem. (szetszwo)
			
 
				+
			
 
				+    HADOOP-249. Reuse JVMs across Map-Reduce Tasks. 
			
 
				+    Configuration changes to hadoop-default.xml:
			
 
				+      add mapred.job.reuse.jvm.num.tasks
			
 
				+    (Devaraj Das via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4070. Provide a mechanism in Hive for registering UDFs from the
			
 
				+    query language. (tomwhite)
			
 
				+
			
 
				+    HADOOP-2536. Implement a JDBC based database input and output formats to
			
 
				+    allow Map-Reduce applications to work with databases. (Fredrik Hedberg and
			
 
				+    Enis Soztutar via acmurthy)
			
 
				+
			
 
				+    HADOOP-3019. A new library to support total order partitions.
			
 
				+    (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-3924. Added a 'KILLED' job status. (Subramaniam Krishnan via
			
 
				+    acmurthy) 
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-4205. hive: metastore and ql to use the refactored SerDe library.
			
 
				+    (zshao)
			
 
				+
			
 
				+    HADOOP-4106. libhdfs: add time, permission and user attribute support 
			
 
				+    (part 2). (Pete Wyckoff through zshao)
			
 
				+
			
 
				+    HADOOP-4104. libhdfs: add time, permission and user attribute support.
			
 
				+    (Pete Wyckoff through zshao)
			
 
				+
			
 
				+    HADOOP-3908. libhdfs: better error message if llibhdfs.so doesn't exist.
			
 
				+    (Pete Wyckoff through zshao)
			
 
				+
			
 
				+    HADOOP-3732. Delay intialization of datanode block verification till
			
 
				+    the verification thread is started. (rangadi)
			
 
				+
			
 
				+    HADOOP-1627. Various small improvements to 'dfsadmin -report' output.
			
 
				+    (rangadi)
			
 
				+
			
 
				+    HADOOP-3577. Tools to inject blocks into name node and simulated
			
 
				+    data nodes for testing. (Sanjay Radia via hairong)
			
 
				+
			
 
				+    HADOOP-2664. Add a lzop compatible codec, so that files compressed by lzop
			
 
				+    may be processed by map/reduce. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-3655. Add additional ant properties to control junit. (Steve 
			
 
				+    Loughran via omalley)
			
 
				+
			
 
				+    HADOOP-3543. Update the copyright year to 2008. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-3587. Add a unit test for the contrib/data_join framework.
			
 
				+    (cdouglas)
			
 
				+
			
 
				+    HADOOP-3402. Add terasort example program (omalley)
			
 
				+
			
 
				+    HADOOP-3660. Add replication factor for injecting blocks in simulated
			
 
				+    datanodes. (Sanjay Radia via cdouglas)
			
 
				+
			
 
				+    HADOOP-3684. Add a cloning function to the contrib/data_join framework
			
 
				+    permitting users to define a more efficient method for cloning values from
			
 
				+    the reduce than serialization/deserialization. (Runping Qi via cdouglas)
			
 
				+
			
 
				+    HADOOP-3478. Improves the handling of map output fetching. Now the
			
 
				+    randomization is by the hosts (and not the map outputs themselves). 
			
 
				+    (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-3617. Removed redundant checks of accounting space in MapTask and
			
 
				+    makes the spill thread persistent so as to avoid creating a new one for
			
 
				+    each spill. (Chris Douglas via acmurthy)  
			
 
				+
			
 
				+    HADOOP-3412. Factor the scheduler out of the JobTracker and make
			
 
				+    it pluggable. (Tom White and Brice Arnould via omalley)
			
 
				+
			
 
				+    HADOOP-3756. Minor. Remove unused dfs.client.buffer.dir from 
			
 
				+    hadoop-default.xml. (rangadi)
			
 
				+
			
 
				+    HADOOP-3747. Adds counter suport for MultipleOutputs. 
			
 
				+    (Alejandro Abdelnur via ddas)
			
 
				+
			
 
				+    HADOOP-3169. LeaseChecker daemon should not be started in DFSClient
			
 
				+    constructor. (TszWo (Nicholas), SZE via hairong)
			
 
				+
			
 
				+    HADOOP-3824. Move base functionality of StatusHttpServer to a core
			
 
				+    package. (TszWo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3646. Add a bzip2 compatible codec, so bzip compressed data
			
 
				+    may be processed by map/reduce. (Abdul Qadeer via cdouglas)
			
 
				+
			
 
				+    HADOOP-3861. MapFile.Reader and Writer should implement Closeable.
			
 
				+    (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-3791. Introduce generics into ReflectionUtils. (Chris Smith via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-3694. Improve unit test performance by changing
			
 
				+    MiniDFSCluster to listen only on 127.0.0.1.  (cutting)
			
 
				+
			
 
				+    HADOOP-3620. Namenode should synchronously resolve a datanode's network
			
 
				+    location when the datanode registers. (hairong)
			
 
				+
			
 
				+    HADOOP-3860. NNThroughputBenchmark is extended with rename and delete 
			
 
				+    benchmarks. (shv)
			
 
				+    
			
 
				+    HADOOP-3892. Include unix group name in JobConf. (Matei Zaharia via johan)
			
 
				+
			
 
				+    HADOOP-3875. Change the time period between heartbeats to be relative to
			
 
				+    the end of the heartbeat rpc, rather than the start. This causes better
			
 
				+    behavior if the JobTracker is overloaded. (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-3853. Move multiple input format (HADOOP-372) extension to 
			
 
				+    library package. (tomwhite via johan)
			
 
				+
			
 
				+    HADOOP-9. Use roulette scheduling for temporary space when the size
			
 
				+    is not known. (Ari Rabkin via omalley)
			
 
				+
			
 
				+    HADOOP-3202. Use recursive delete rather than FileUtil.fullyDelete.
			
 
				+    (Amareshwari Sriramadasu via omalley)
			
 
				+
			
 
				+    HADOOP-3368. Remove common-logging.properties from conf. (Steve Loughran 
			
 
				+    via omalley)
			
 
				+
			
 
				+    HADOOP-3851. Fix spelling mistake in FSNamesystemMetrics. (Steve Loughran 
			
 
				+    via omalley)
			
 
				+
			
 
				+    HADOOP-3780. Remove asynchronous resolution of network topology in the 
			
 
				+    JobTracker (Amar Kamat via omalley)
			
 
				+
			
 
				+    HADOOP-3852. Add ShellCommandExecutor.toString method to make nicer
			
 
				+    error messages. (Steve Loughran via omalley)
			
 
				+
			
 
				+    HADOOP-3844. Include message of local exception in RPC client failures.
			
 
				+    (Steve Loughran via omalley)
			
 
				+
			
 
				+    HADOOP-3935. Split out inner classes from DataNode.java. (johan)
			
 
				+
			
 
				+    HADOOP-3905. Create generic interfaces for edit log streams. (shv)
			
 
				+
			
 
				+    HADOOP-3062. Add metrics to DataNode and TaskTracker to record network
			
 
				+    traffic for HDFS reads/writes and MR shuffling. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3742. Remove HDFS from public java doc and add javadoc-dev for
			
 
				+    generative javadoc for developers. (Sanjay Radia via omalley)
			
 
				+
			
 
				+    HADOOP-3944. Improve documentation for public TupleWritable class in 
			
 
				+    join package. (Chris Douglas via enis)
			
 
				+
			
 
				+    HADOOP-2330. Preallocate HDFS transaction log to improve performance.
			
 
				+    (dhruba and hairong)
			
 
				+
			
 
				+    HADOOP-3965. Convert DataBlockScanner into a package private class. (shv)
			
 
				+
			
 
				+    HADOOP-3488. Prevent hadoop-daemon from rsync'ing log files (Stefan 
			
 
				+    Groshupf and Craig Macdonald via omalley)
			
 
				+
			
 
				+    HADOOP-3342. Change the kill task actions to require http post instead of 
			
 
				+    get to prevent accidental crawls from triggering it. (enis via omalley)
			
 
				+
			
 
				+    HADOOP-3937. Limit the job name in the job history filename to 50 
			
 
				+    characters. (Matei Zaharia via omalley)
			
 
				+
			
 
				+    HADOOP-3943. Remove unnecessary synchronization in 
			
 
				+    NetworkTopology.pseudoSortByDistance. (hairong via omalley)
			
 
				+
			
 
				+    HADOOP-3498. File globbing alternation should be able to span path
			
 
				+    components. (tomwhite)
			
 
				+
			
 
				+    HADOOP-3361. Implement renames for NativeS3FileSystem.
			
 
				+    (Albert Chern via tomwhite)
			
 
				+
			
 
				+    HADOOP-3605. Make EC2 scripts show an error message if AWS_ACCOUNT_ID is
			
 
				+    unset. (Al Hoang via tomwhite)
			
 
				+
			
 
				+    HADOOP-4147. Remove unused class JobWithTaskContext from class
			
 
				+    JobInProgress. (Amareshwari Sriramadasu via johan)
			
 
				+
			
 
				+    HADOOP-4151. Add a byte-comparable interface that both Text and 
			
 
				+    BytesWritable implement. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-4174. Move fs image/edit log methods from ClientProtocol to
			
 
				+    NamenodeProtocol. (shv via szetszwo)
			
 
				+
			
 
				+    HADOOP-4181. Include a .gitignore and saveVersion.sh change to support
			
 
				+    developing under git. (omalley)
			
 
				+
			
 
				+    HADOOP-4186. Factor LineReader out of LineRecordReader. (tomwhite via
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-4184. Break the module dependencies between core, hdfs, and 
			
 
				+    mapred. (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-4075. test-patch.sh now spits out ant commands that it runs.
			
 
				+    (Ramya R via nigel)
			
 
				+
			
 
				+    HADOOP-4117. Improve configurability of Hadoop EC2 instances.
			
 
				+    (tomwhite)
			
 
				+
			
 
				+    HADOOP-2411. Add support for larger CPU EC2 instance types.
			
 
				+    (Chris K Wensel via tomwhite)
			
 
				+
			
 
				+    HADOOP-4083. Changed the configuration attribute queue.name to
			
 
				+    mapred.job.queue.name. (Hemanth Yamijala via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4194. Added the JobConf and JobID to job-related methods in
			
 
				+    JobTrackerInstrumentation for better metrics. (Mac Yang via acmurthy) 
			
 
				+
			
 
				+    HADOOP-3975. Change test-patch script to report working the dir
			
 
				+    modifications preventing the suite from being run. (Ramya R via cdouglas)
			
 
				+
			
 
				+    HADOOP-4124. Added a command-line switch to allow users to set job
			
 
				+    priorities, also allow it to be manipulated via the web-ui. (Hemanth
			
 
				+    Yamijala via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2165. Augmented JobHistory to include the URIs to the tasks'
			
 
				+    userlogs. (Vinod Kumar Vavilapalli via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4062. Remove the synchronization on the output stream when a
			
 
				+    connection is closed and also remove an undesirable exception when
			
 
				+    a client is stoped while there is no pending RPC request. (hairong)
			
 
				+
			
 
				+    HADOOP-4227. Remove the deprecated class org.apache.hadoop.fs.ShellCommand.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4006. Clean up FSConstants and move some of the constants to
			
 
				+    better places. (Sanjay Radia via rangadi)
			
 
				+
			
 
				+    HADOOP-4279. Trace the seeds of random sequences in append unit tests to
			
 
				+    make itermitant failures reproducible. (szetszwo via cdouglas)
			
 
				+
			
 
				+    HADOOP-4209. Remove the change to the format of task attempt id by 
			
 
				+    incrementing the task attempt numbers by 1000 when the job restarts.
			
 
				+    (Amar Kamat via omalley)
			
 
				+
			
 
				+    HADOOP-4301. Adds forrest doc for the skip bad records feature.
			
 
				+    (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-4354. Separate TestDatanodeDeath.testDatanodeDeath() into 4 tests.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-3790. Add more unit tests for testing HDFS file append.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-4321. Include documentation for the capacity scheduler. (Hemanth 
			
 
				+    Yamijala via omalley)
			
 
				+
			
 
				+    HADOOP-4424. Change menu layout for Hadoop documentation (Boris Shkolnik
			
 
				+    via cdouglas).
			
 
				+
			
 
				+    HADOOP-4438. Update forrest documentation to include missing FsShell
			
 
				+    commands. (Suresh Srinivas via cdouglas)
			
 
				+
			
 
				+    HADOOP-4105.  Add forrest documentation for libhdfs.
			
 
				+    (Pete Wyckoff via cutting)
			
 
				+
			
 
				+    HADOOP-4510. Make getTaskOutputPath public. (Chris Wensel via omalley)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+    HADOOP-3556. Removed lock contention in MD5Hash by changing the 
			
 
				+    singleton MessageDigester by an instance per Thread using 
			
 
				+    ThreadLocal. (Iv?n de Prado via omalley)
			
 
				+
			
 
				+    HADOOP-3328. When client is writing data to DFS, only the last 
			
 
				+    datanode in the pipeline needs to verify the checksum. Saves around
			
 
				+    30% CPU on intermediate datanodes. (rangadi)
			
 
				+
			
 
				+    HADOOP-3863. Use a thread-local string encoder rather than a static one
			
 
				+    that is protected by a lock. (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-3864. Prevent the JobTracker from locking up when a job is being
			
 
				+    initialized. (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-3816. Faster directory listing in KFS. (Sriram Rao via omalley)
			
 
				+
			
 
				+    HADOOP-2130. Pipes submit job should have both blocking and non-blocking
			
 
				+    versions. (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-3769. Make the SampleMapper and SampleReducer from
			
 
				+    GenericMRLoadGenerator public, so they can be used in other contexts. 
			
 
				+    (Lingyun Yang via omalley)
			
 
				+
			
 
				+    HADOOP-3514. Inline the CRCs in intermediate files as opposed to reading
			
 
				+    it from a different .crc file. (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-3638. Caches the iFile index files in memory to reduce seeks
			
 
				+    (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-4225. FSEditLog.logOpenFile() should persist accessTime 
			
 
				+    rather than modificationTime. (shv)
			
 
				+
			
 
				+    HADOOP-4380. Made several new classes (Child, JVMId, 
			
 
				+    JobTrackerInstrumentation, QueueManager, ResourceEstimator, 
			
 
				+    TaskTrackerInstrumentation, and TaskTrackerMetricsInst) in 
			
 
				+    org.apache.hadoop.mapred  package private instead of public. (omalley)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3563.  Refactor the distributed upgrade code so that it is 
			
 
				+    easier to identify datanode and namenode related code. (dhruba)
			
 
				+
			
 
				+    HADOOP-3640. Fix the read method in the NativeS3InputStream. (tomwhite via
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-3711. Fixes the Streaming input parsing to properly find the 
			
 
				+    separator. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3725. Prevent TestMiniMRMapDebugScript from swallowing exceptions.
			
 
				+    (Steve Loughran via cdouglas)
			
 
				+
			
 
				+    HADOOP-3726. Throw exceptions from TestCLI setup and teardown instead of
			
 
				+    swallowing them. (Steve Loughran via cdouglas)
			
 
				+
			
 
				+    HADOOP-3721. Refactor CompositeRecordReader and related mapred.join classes
			
 
				+    to make them clearer. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3720. Re-read the config file when dfsadmin -refreshNodes is invoked
			
 
				+    so dfs.hosts and dfs.hosts.exclude are observed. (lohit vijayarenu via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-3485. Allow writing to files over fuse.
			
 
				+    (Pete Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-3723. The flags to the libhdfs.create call can be treated as
			
 
				+    a bitmask. (Pete Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-3643. Filter out completed tasks when asking for running tasks in
			
 
				+    the JobTracker web/ui. (Amar Kamat via omalley)
			
 
				+
			
 
				+    HADOOP-3777. Ensure that Lzo compressors/decompressors correctly handle the
			
 
				+    case where native libraries aren't available. (Chris Douglas via acmurthy) 
			
 
				+
			
 
				+    HADOOP-3728. Fix SleepJob so that it doesn't depend on temporary files,
			
 
				+    this ensures we can now run more than one instance of SleepJob
			
 
				+    simultaneously. (Chris Douglas via acmurthy) 
			
 
				+
			
 
				+    HADOOP-3795. Fix saving image files on Namenode with different checkpoint
			
 
				+    stamps. (Lohit Vijayarenu via mahadev)
			
 
				+   
			
 
				+    HADOOP-3624. Improving createeditslog to create tree directory structure.
			
 
				+    (Lohit Vijayarenu via mahadev)
			
 
				+
			
 
				+    HADOOP-3778. DFSInputStream.seek() did not retry in case of some errors.
			
 
				+    (Luo Ning via rangadi)
			
 
				+
			
 
				+    HADOOP-3661. The handling of moving files deleted through fuse-dfs to
			
 
				+    Trash made similar to the behaviour from dfs shell.
			
 
				+    (Pete Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-3819. Unset LANG and LC_CTYPE in saveVersion.sh to make it
			
 
				+    compatible with non-English locales. (Rong-En Fan via cdouglas)
			
 
				+
			
 
				+    HADOOP-3848. Cache calls to getSystemDir in the TaskTracker instead of
			
 
				+    calling it for each task start. (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-3131. Fix reduce progress reporting for compressed intermediate
			
 
				+    data. (Matei Zaharia via acmurthy) 
			
 
				+
			
 
				+    HADOOP-3796. fuse-dfs configuration is implemented as file system
			
 
				+    mount options. (Pete Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-3836. Fix TestMultipleOutputs to correctly clean up. (Alejandro 
			
 
				+    Abdelnur via acmurthy)
			
 
				+
			
 
				+    HADOOP-3805. Improve fuse-dfs write performance.
			
 
				+    (Pete Wyckoff via zshao)
			
 
				+
			
 
				+    HADOOP-3846. Fix unit test CreateEditsLog to generate paths correctly. 
			
 
				+    (Lohit Vjayarenu via cdouglas)
			
 
				+    
			
 
				+    HADOOP-3904. Fix unit tests using the old dfs package name.
			
 
				+    (TszWo (Nicholas), SZE via johan)
			
 
				+
			
 
				+    HADOOP-3319. Fix some HOD error messages to go stderr instead of
			
 
				+    stdout. (Vinod Kumar Vavilapalli via omalley)
			
 
				+
			
 
				+    HADOOP-3907. Move INodeDirectoryWithQuota to its own .java file.
			
 
				+    (Tsz Wo (Nicholas), SZE via hairong)
			
 
				+
			
 
				+    HADOOP-3919. Fix attribute name in hadoop-default for 
			
 
				+    mapred.jobtracker.instrumentation. (Ari Rabkin via omalley)
			
 
				+
			
 
				+    HADOOP-3903. Change the package name for the servlets to be hdfs instead of
			
 
				+    dfs. (Tsz Wo (Nicholas) Sze via omalley)
			
 
				+
			
 
				+    HADOOP-3773. Change Pipes to set the default map output key and value 
			
 
				+    types correctly. (Koji Noguchi via omalley)
			
 
				+
			
 
				+    HADOOP-3952. Fix compilation error in TestDataJoin referencing dfs package.
			
 
				+    (omalley)
			
 
				+
			
 
				+    HADOOP-3951. Fix package name for FSNamesystem logs and modify other
			
 
				+    hard-coded Logs to use the class name. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3889. Improve error reporting from HftpFileSystem, handling in
			
 
				+    DistCp. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3946. Fix TestMapRed after hadoop-3664. (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-3949. Remove duplicate jars from Chukwa. (Jerome Boulon via omalley)
			
 
				+
			
 
				+    HADOOP-3933. DataNode sometimes sends up to io.byte.per.checksum bytes 
			
 
				+    more than required to client. (Ning Li via rangadi)
			
 
				+
			
 
				+    HADOOP-3962. Shell command "fs -count" should support paths with different
			
 
				+    file systems. (Tsz Wo (Nicholas), SZE via mahadev)
			
 
				+
			
 
				+    HADOOP-3957. Fix javac warnings in DistCp and TestCopyFiles. (Tsz Wo
			
 
				+    (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3958. Fix TestMapRed to check the success of test-job. (omalley via
			
 
				+    acmurthy)
			
 
				+
			
 
				+    HADOOP-3985. Fix TestHDFSServerPorts to use random ports.  (Hairong Kuang 
			
 
				+    via omalley)
			
 
				+
			
 
				+    HADOOP-3964. Fix javadoc warnings introduced by FailMon. (dhruba)
			
 
				+
			
 
				+    HADOOP-3785. Fix FileSystem cache to be case-insensitive for scheme and
			
 
				+    authority. (Bill de hOra via cdouglas)
			
 
				+
			
 
				+    HADOOP-3506. Fix a rare NPE caused by error handling in S3. (Tom White via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-3705. Fix mapred.join parser to accept InputFormats named with
			
 
				+    underscore and static, inner classes. (cdouglas)
			
 
				+
			
 
				+    HADOOP-4023. Fix javadoc warnings introduced when the HDFS javadoc was 
			
 
				+    made private. (omalley)
			
 
				+
			
 
				+    HADOOP-4030. Remove lzop from the default list of codecs. (Arun Murthy via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-3961. Fix task disk space requirement estimates for virtual
			
 
				+    input jobs. Delays limiting task placement until after 10% of the maps
			
 
				+    have finished. (Ari Rabkin via omalley)
			
 
				+
			
 
				+    HADOOP-2168. Fix problem with C++ record reader's progress not being
			
 
				+    reported to framework. (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-3966. Copy findbugs generated output files to PATCH_DIR while 
			
 
				+    running test-patch. (Ramya R via lohit)
			
 
				+
			
 
				+    HADOOP-4037. Fix the eclipse plugin for versions of kfs and log4j. (nigel
			
 
				+    via omalley)
			
 
				+
			
 
				+    HADOOP-3950. Cause the Mini MR cluster to wait for task trackers to 
			
 
				+    register before continuing. (enis via omalley)
			
 
				+
			
 
				+    HADOOP-3910. Remove unused ClusterTestDFSNamespaceLogging and
			
 
				+    ClusterTestDFS. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3954. Disable record skipping by default. (Sharad Agarwal via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-4050. Fix TestFairScheduler to use absolute paths for the work
			
 
				+    directory. (Matei Zaharia via omalley)
			
 
				+
			
 
				+    HADOOP-4069. Keep temporary test files from TestKosmosFileSystem under
			
 
				+    test.build.data instead of /tmp. (lohit via omalley)
			
 
				+ 
			
 
				+    HADOOP-4078. Create test files for TestKosmosFileSystem in separate
			
 
				+    directory under test.build.data. (lohit)
			
 
				+
			
 
				+    HADOOP-3968. Fix getFileBlockLocations calls to use FileStatus instead
			
 
				+    of Path reflecting the new API. (Pete Wyckoff via lohit)
			
 
				+
			
 
				+    HADOOP-3963. libhdfs does not exit on its own, instead it returns error 
			
 
				+    to the caller and behaves as a true library. (Pete Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-4100. Removes the cleanupTask scheduling from the Scheduler 
			
 
				+    implementations and moves it to the JobTracker. 
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4097. Make hive work well with speculative execution turned on.
			
 
				+    (Joydeep Sen Sarma via dhruba)
			
 
				+
			
 
				+    HADOOP-4113. Changes to libhdfs to not exit on its own, rather return
			
 
				+    an error code to the caller. (Pete Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-4054. Remove duplicate lease removal during edit log loading.
			
 
				+    (hairong)
			
 
				+
			
 
				+    HADOOP-4071. FSNameSystem.isReplicationInProgress should add an
			
 
				+    underReplicated block to the neededReplication queue using method 
			
 
				+    "add" not "update". (hairong)
			
 
				+
			
 
				+    HADOOP-4154. Fix type warnings in WritableUtils. (szetszwo via omalley)
			
 
				+
			
 
				+    HADOOP-4133. Log files generated by Hive should reside in the 
			
 
				+    build directory. (Prasad Chakka via dhruba)
			
 
				+
			
 
				+    HADOOP-4094. Hive now has hive-default.xml and hive-site.xml similar
			
 
				+    to core hadoop. (Prasad Chakka via dhruba)
			
 
				+
			
 
				+    HADOOP-4112. Handles cleanupTask in JobHistory 
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3831. Very slow reading clients sometimes failed while reading.
			
 
				+    (rangadi)
			
 
				+
			
 
				+    HADOOP-4155. Use JobTracker's start time while initializing JobHistory's
			
 
				+    JobTracker Unique String. (lohit) 
			
 
				+
			
 
				+    HADOOP-4099. Fix null pointer when using HFTP from an 0.18 server.
			
 
				+    (dhruba via omalley)
			
 
				+
			
 
				+    HADOOP-3570. Includes user specified libjar files in the client side 
			
 
				+    classpath path. (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-4129. Changed memory limits of TaskTracker and Tasks to be in
			
 
				+    KiloBytes rather than bytes. (Vinod Kumar Vavilapalli via acmurthy)
			
 
				+
			
 
				+    HADOOP-4139. Optimize Hive multi group-by.
			
 
				+    (Namin Jain via dhruba)
			
 
				+
			
 
				+    HADOOP-3911. Add a check to fsck options to make sure -files is not 
			
 
				+    the first option to resolve conflicts with GenericOptionsParser
			
 
				+    (lohit)
			
 
				+
			
 
				+    HADOOP-3623. Refactor LeaseManager. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4125. Handles Reduce cleanup tip on the web ui.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4087. Hive Metastore API for php and python clients.
			
 
				+    (Prasad Chakka via dhruba)
			
 
				+
			
 
				+    HADOOP-4197. Update DATA_TRANSFER_VERSION for HADOOP-3981. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4138. Refactor the Hive SerDe library to better structure
			
 
				+    the interfaces to the serializer and de-serializer.
			
 
				+    (Zheng Shao via dhruba)
			
 
				+
			
 
				+    HADOOP-4195. Close compressor before returning to codec pool.
			
 
				+    (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-2403. Escapes some special characters before logging to 
			
 
				+    history files. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4200. Fix a bug in the test-patch.sh script.
			
 
				+    (Ramya R via nigel)
			
 
				+
			
 
				+    HADOOP-4084. Add explain plan capabilities to Hive Query Language.
			
 
				+    (Ashish Thusoo via dhruba)
			
 
				+
			
 
				+    HADOOP-4121. Preserve cause for exception if the initialization of
			
 
				+    HistoryViewer for JobHistory fails. (Amareshwari Sri Ramadasu via
			
 
				+    acmurthy) 
			
 
				+
			
 
				+    HADOOP-4213. Fixes NPE in TestLimitTasksPerJobTaskScheduler.
			
 
				+    (Sreekanth Ramakrishnan via ddas)
			
 
				+
			
 
				+    HADOOP-4077. Setting access and modification time for a file
			
 
				+    requires write permissions on the file. (dhruba)
			
 
				+
			
 
				+    HADOOP-3592. Fix a couple of possible file leaks in FileUtil
			
 
				+    (Bill de hOra via rangadi)
			
 
				+
			
 
				+    HADOOP-4120. Hive interactive shell records the time taken by a 
			
 
				+    query.  (Raghotham Murthy via dhruba)
			
 
				+
			
 
				+    HADOOP-4090. The hive scripts pick up hadoop from HADOOP_HOME
			
 
				+    and then the path. (Raghotham Murthy via dhruba)
			
 
				+
			
 
				+    HADOOP-4242. Remove extra ";" in FSDirectory that blocks compilation
			
 
				+    in some IDE's. (szetszwo via omalley)
			
 
				+
			
 
				+    HADOOP-4249. Fix eclipse path to include the hsqldb.jar. (szetszwo via
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-4247. Move InputSampler into org.apache.hadoop.mapred.lib, so that
			
 
				+    examples.jar doesn't depend on tools.jar. (omalley)
			
 
				+
			
 
				+    HADOOP-4269. Fix the deprecation of LineReader by extending the new class
			
 
				+    into the old name and deprecating it. Also update the tests to test the 
			
 
				+    new class. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-4280. Fix conversions between seconds in C and milliseconds in 
			
 
				+    Java for access times for files. (Pete Wyckoff via rangadi)
			
 
				+
			
 
				+    HADOOP-4254. -setSpaceQuota command does not convert "TB" extenstion to
			
 
				+    terabytes properly. Implementation now uses StringUtils for parsing this.
			
 
				+    (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4259. Findbugs should run over tools.jar also. (cdouglas via 
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-4275. Move public method isJobValidName from JobID to a private
			
 
				+    method in JobTracker. (omalley)
			
 
				+
			
 
				+    HADOOP-4173. fix failures in TestProcfsBasedProcessTree and
			
 
				+    TestTaskTrackerMemoryManager tests. ProcfsBasedProcessTree and
			
 
				+    memory management in TaskTracker are disabled on Windows.
			
 
				+    (Vinod K V via rangadi)
			
 
				+
			
 
				+    HADOOP-4189. Fixes the history blocksize & intertracker protocol version
			
 
				+    issues introduced as part of HADOOP-3245. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-4190. Fixes the backward compatibility issue with Job History.
			
 
				+    introduced by HADOOP-3245 and HADOOP-2403. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-4237. Fixes the TestStreamingBadRecords.testNarrowDown testcase.
			
 
				+    (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-4274. Capacity scheduler accidently modifies the underlying 
			
 
				+    data structures when browing the job lists. (Hemanth Yamijala via omalley)
			
 
				+
			
 
				+    HADOOP-4309. Fix eclipse-plugin compilation. (cdouglas)
			
 
				+
			
 
				+    HADOOP-4232. Fix race condition in JVM reuse when multiple slots become
			
 
				+    free. (ddas via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4302. Fix a race condition in TestReduceFetch that can yield false
			
 
				+    negatvies. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3942. Update distcp documentation to include features introduced in
			
 
				+    HADOOP-3873, HADOOP-3939. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-4319. fuse-dfs dfs_read function returns as many bytes as it is
			
 
				+    told to read unlesss end-of-file is reached.  (Pete Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-4246. Ensure we have the correct lower bound on the number of
			
 
				+    retries for fetching map-outputs; also fixed the case where the reducer
			
 
				+    automatically kills on too many unique map-outputs could not be fetched
			
 
				+    for small jobs. (Amareshwari Sri Ramadasu via acmurthy)  
			
 
				+
			
 
				+    HADOOP-4163. Report FSErrors from map output fetch threads instead of
			
 
				+    merely logging them. (Sharad Agarwal via cdouglas)
			
 
				+
			
 
				+    HADOOP-4261. Adds a setup task for jobs. This is required so that we 
			
 
				+    don't setup jobs that haven't been inited yet (since init could lead
			
 
				+    to job failure). Only after the init has successfully happened do we 
			
 
				+    launch the setupJob task. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4256. Removes Completed and Failed Job tables from 
			
 
				+    jobqueue_details.jsp. (Sreekanth Ramakrishnan via ddas)
			
 
				+
			
 
				+    HADOOP-4267. Occasional exceptions during shutting down HSQLDB is logged 
			
 
				+    but not rethrown. (enis) 
			
 
				+
			
 
				+    HADOOP-4018. The number of tasks for a single job cannot exceed a 
			
 
				+    pre-configured maximum value. (dhruba)
			
 
				+
			
 
				+    HADOOP-4288. Fixes a NPE problem in CapacityScheduler. 
			
 
				+    (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-4014. Create hard links with 'fsutil hardlink' on Windows. (shv)
			
 
				+
			
 
				+    HADOOP-4393. Merged org.apache.hadoop.fs.permission.AccessControlException
			
 
				+    and org.apache.hadoop.security.AccessControlIOException into a single
			
 
				+    class hadoop.security.AccessControlException. (omalley via acmurthy)
			
 
				+
			
 
				+    HADOOP-4287. Fixes an issue to do with maintaining counts of running/pending
			
 
				+    maps/reduces. (Sreekanth Ramakrishnan via ddas)
			
 
				+
			
 
				+    HADOOP-4361. Makes sure that jobs killed from command line are killed
			
 
				+    fast (i.e., there is a slot to run the cleanup task soon).
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4400. Add "hdfs://" to fs.default.name on quickstart.html.
			
 
				+    (Jeff Hammerbacher via omalley)
			
 
				+
			
 
				+    HADOOP-4378. Fix TestJobQueueInformation to use SleepJob rather than
			
 
				+    WordCount via TestMiniMRWithDFS. (Sreekanth Ramakrishnan via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4376. Fix formatting in hadoop-default.xml for
			
 
				+    hadoop.http.filter.initializers. (Enis Soztutar via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4410. Adds an extra arg to the API FileUtil.makeShellPath to
			
 
				+    determine whether to canonicalize file paths or not.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4236. Ensure un-initialized jobs are killed correctly on
			
 
				+    user-demand. (Sharad Agarwal via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4373. Fix calculation of Guaranteed Capacity for the
			
 
				+    capacity-scheduler. (Hemanth Yamijala via acmurthy) 
			
 
				+
			
 
				+    HADOOP-4053. Schedulers must be notified when jobs complete. (Amar Kamat via omalley)
			
 
				+
			
 
				+    HADOOP-4335. Fix FsShell -ls for filesystems without owners/groups. (David
			
 
				+    Phillips via cdouglas)
			
 
				+
			
 
				+    HADOOP-4426. TestCapacityScheduler broke due to the two commits HADOOP-4053
			
 
				+    and HADOOP-4373. This patch fixes that. (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-4418. Updates documentation in forrest for Mapred, streaming and pipes.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3155. Ensure that there is only one thread fetching 
			
 
				+    TaskCompletionEvents on TaskTracker re-init. (Dhruba Borthakur via
			
 
				+    acmurthy) 
			
 
				+
			
 
				+    HADOOP-4425. Fix EditLogInputStream to overload the bulk read method.
			
 
				+    (cdouglas)
			
 
				+
			
 
				+    HADOOP-4427. Adds the new queue/job commands to the manual.
			
 
				+    (Sreekanth Ramakrishnan via ddas)
			
 
				+
			
 
				+    HADOOP-4278. Increase debug logging for unit test TestDatanodeDeath.
			
 
				+    Fix the case when primary is dead.  (dhruba via szetszwo)
			
 
				+
			
 
				+    HADOOP-4423. Keep block length when the block recovery is triggered by
			
 
				+    append.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-4449. Fix dfsadmin usage. (Raghu Angadi via cdouglas)
			
 
				+
			
 
				+    HADOOP-4455. Added TestSerDe so that unit tests can run successfully.
			
 
				+    (Ashish Thusoo via dhruba)
			
 
				+
			
 
				+    HADOOP-4457. Fixes an input split logging problem introduced by
			
 
				+    HADOOP-3245. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-4464. Separate out TestFileCreationClient from TestFileCreation.
			
 
				+    (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-4404. saveFSImage() removes files from a storage directory that do 
			
 
				+    not correspond to its type. (shv)
			
 
				+
			
 
				+    HADOOP-4149. Fix handling of updates to the job priority, by changing the
			
 
				+    list of jobs to be keyed by the priority, submit time, and job tracker id.
			
 
				+    (Amar Kamat via omalley)
			
 
				+
			
 
				+    HADOOP-4296. Fix job client failures by not retiring a job as soon as it
			
 
				+    is finished. (dhruba)
			
 
				+
			
 
				+    HADOOP-4439. Remove configuration variables that aren't usable yet, in
			
 
				+    particular mapred.tasktracker.tasks.maxmemory and mapred.task.max.memory.
			
 
				+    (Hemanth Yamijala via omalley)
			
 
				+
			
 
				+    HADOOP-4230. Fix for serde2 interface, limit operator, select * operator,
			
 
				+    UDF trim functions and sampling. (Ashish Thusoo via dhruba)
			
 
				+
			
 
				+    HADOOP-4358. No need to truncate access time in INode. Also fixes NPE 
			
 
				+    in CreateEditsLog. (Raghu Angadi) 
			
 
				+
			
 
				+    HADOOP-4387. TestHDFSFileSystemContract fails on windows nightly builds.
			
 
				+    (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4466. Ensure that SequenceFileOutputFormat isn't tied to Writables
			
 
				+    and can be used with other Serialization frameworks. (Chris Wensel via
			
 
				+    acmurthy)
			
 
				+
			
 
				+    HADOOP-4525. Fix ipc.server.ipcnodelay originally missed in in HADOOP-2232.
			
 
				+    (cdouglas via Clint Morgan)
			
 
				+
			
 
				+    HADOOP-4498. Ensure that JobHistory correctly escapes the job name so that
			
 
				+    regex patterns work. (Chris Wensel via acmurthy)
			
 
				+
			
 
				+    HADOOP-4446. Modify guaranteed capacity labels in capacity scheduler's UI
			
 
				+    to reflect the information being displayed. (Sreekanth Ramakrishnan via 
			
 
				+    yhemanth)
			
 
				+
			
 
				+    HADOOP-4282. Some user facing URLs are not filtered by user filters.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4595. Fixes two race conditions - one to do with updating free slot count,
			
 
				+    and another to do with starting the MapEventsFetcher thread. (ddas)
			
 
				+
			
 
				+    HADOOP-4552. Fix a deadlock in RPC server. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4471. Sort running jobs by priority in the capacity scheduler.
			
 
				+    (Amar Kamat via yhemanth) 
			
 
				+
			
 
				+    HADOOP-4500. Fix MultiFileSplit to get the FileSystem from the relevant
			
 
				+    path rather than the JobClient. (Joydeep Sen Sarma via cdouglas)
			
 
				+
			
 
				+Release 0.18.4 - Unreleased
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-5114. Remove timeout for accept() in DataNode. This makes accept() 
			
 
				+    fail in JDK on Windows and causes many tests to fail. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-5192. Block receiver should not remove a block that's created or
			
 
				+    being written by other threads. (hairong)
			
 
				+ 
			
 
				+    HADOOP-5134. FSNamesystem#commitBlockSynchronization adds under-construction
			
 
				+    block locations to blocksMap. (Dhruba Borthakur via hairong)
			
 
				+
			
 
				+    HADOOP-5412. Simulated DataNode should not write to a block that's being
			
 
				+    written by another thread. (hairong)
			
 
				+
			
 
				+    HADOOP-5465. Fix the problem of blocks remaining under-replicated by
			
 
				+    providing synchronized modification to the counter xmitsInProgress in
			
 
				+    DataNode. (hairong)
			
 
				+
			
 
				+    HADOOP-5557. Fixes some minor problems in TestOverReplicatedBlocks.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-5644. Namenode is stuck in safe mode. (suresh Srinivas via hairong)
			
 
				+
			
 
				+    HADOOP-6017. Lease Manager in NameNode does not handle certain characters
			
 
				+    in filenames. This results in fatal errors in Secondary NameNode and while
			
 
				+    restrating NameNode. (Tsz Wo (Nicholas), SZE via rangadi)
			
 
				+
			
 
				+Release 0.18.3 - 2009-01-27
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-4150. Include librecordio in hadoop releases. (Giridharan Kesavan
			
 
				+    via acmurthy)
			
 
				+
			
 
				+    HADOOP-4668. Improve documentation for setCombinerClass to clarify the
			
 
				+    restrictions on combiners. (omalley)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-4499. DFSClient should invoke checksumOk only once. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4597. Calculate mis-replicated blocks when safe-mode is turned
			
 
				+    off manually. (shv)
			
 
				+
			
 
				+    HADOOP-3121. lsr should keep listing the remaining items but not
			
 
				+    terminate if there is any IOException. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4610. Always calculate mis-replicated blocks when safe-mode is 
			
 
				+    turned off. (shv)
			
 
				+
			
 
				+    HADOOP-3883. Limit namenode to assign at most one generation stamp for
			
 
				+    a particular block within a short period. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4556. Block went missing. (hairong)
			
 
				+
			
 
				+    HADOOP-4643. NameNode should exclude excessive replicas when counting
			
 
				+    live replicas for a block. (hairong)
			
 
				+
			
 
				+    HADOOP-4703. Should not wait for proxy forever in lease recovering.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4647. NamenodeFsck should close the DFSClient it has created.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4616. Fuse-dfs can handle bad values from FileSystem.read call.
			
 
				+    (Pete Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-4061. Throttle Datanode decommission monitoring in Namenode.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4659. Root cause of connection failure is being lost to code that
			
 
				+    uses it for delaying startup. (Steve Loughran and Hairong via hairong)
			
 
				+
			
 
				+    HADOOP-4614. Lazily open segments when merging map spills to avoid using
			
 
				+    too many file descriptors. (Yuri Pradkin via cdouglas)
			
 
				+
			
 
				+    HADOOP-4257. The DFS client should pick only one datanode as the candidate
			
 
				+    to initiate lease recovery.  (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-4713. Fix librecordio to handle records larger than 64k. (Christian
			
 
				+    Kunz via cdouglas)
			
 
				+
			
 
				+    HADOOP-4635. Fix a memory leak in fuse dfs. (pete wyckoff via mahadev)
			
 
				+
			
 
				+    HADOOP-4714. Report status between merges and make the number of records
			
 
				+    between progress reports configurable. (Jothi Padmanabhan via cdouglas)
			
 
				+
			
 
				+    HADOOP-4726. Fix documentation typos "the the". (Edward J. Yoon via
			
 
				+    szetszwo)
			
 
				+
			
 
				+    HADOOP-4679. Datanode prints tons of log messages: waiting for threadgroup
			
 
				+    to exit, active threads is XX. (hairong)
			
 
				+
			
 
				+    HADOOP-4746. Job output directory should be normalized. (hairong)
			
 
				+
			
 
				+    HADOOP-4717. Removal of default port# in NameNode.getUri() causes a
			
 
				+    map/reduce job failed to prompt temporary output. (hairong)
			
 
				+
			
 
				+    HADOOP-4778. Check for zero size block meta file when updating a block.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4742. Replica gets deleted by mistake. (Wang Xu via hairong)
			
 
				+
			
 
				+    HADOOP-4702. Failed block replication leaves an incomplete block in
			
 
				+    receiver's tmp data directory. (hairong)
			
 
				+
			
 
				+    HADOOP-4613. Fix block browsing on Web UI. (Johan Oskarsson via shv)
			
 
				+
			
 
				+    HADOOP-4806. HDFS rename should not use src path as a regular expression.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4795. Prevent lease monitor getting into an infinite loop when
			
 
				+    leases and the namespace tree does not match. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4620. Fixes Streaming to handle well the cases of map/reduce with empty
			
 
				+    input/output. (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-4857. Fixes TestUlimit to have exactly 1 map in the jobs spawned.
			
 
				+    (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-4810. Data lost at cluster startup time. (hairong)
			
 
				+
			
 
				+    HADOOP-4797. Improve how RPC server reads and writes large buffers. Avoids
			
 
				+    soft-leak of direct buffers and excess copies in NIO layer. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4840. TestNodeCount sometimes fails with NullPointerException.
			
 
				+    (hairong)
			
 
				+
			
 
				+    HADOOP-4904. Fix deadlock while leaving safe mode. (shv)
			
 
				+
			
 
				+    HADOOP-1980. 'dfsadmin -safemode enter' should prevent the namenode from
			
 
				+    leaving safemode automatically. (shv & Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4951. Lease monitor should acquire the LeaseManager lock but not the
			
 
				+    Monitor lock. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4935. processMisReplicatedBlocks() should not clear 
			
 
				+    excessReplicateMap. (shv)
			
 
				+
			
 
				+    HADOOP-4961. Fix ConcurrentModificationException in lease recovery 
			
 
				+    of empty files. (shv)
			
 
				+
			
 
				+    HADOOP-4971. A long (unexpected) delay at datanodes could make subsequent
			
 
				+    block reports from many datanode at the same time. (Raghu Angadi)
			
 
				+    
			
 
				+    HADOOP-4910. NameNode should exclude replicas when choosing excessive
			
 
				+    replicas to delete to avoid data lose. (hairong)
			
 
				+
			
 
				+    HADOOP-4983. Fixes a problem in updating Counters in the status reporting.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+Release 0.18.2 - 2008-11-03
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3614. Fix a bug that Datanode may use an old GenerationStamp to get
			
 
				+    meta file. (szetszwo)
			
 
				+
			
 
				+    HADOOP-4314. Simulated datanodes should not include blocks that are still
			
 
				+    being written in their block report. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4228. dfs datanode metrics, bytes_read and bytes_written, overflow
			
 
				+    due to incorrect type used. (hairong)
			
 
				+
			
 
				+    HADOOP-4395. The FSEditLog loading is incorrect for the case OP_SET_OWNER.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4351. FSNamesystem.getBlockLocationsInternal throws
			
 
				+    ArrayIndexOutOfBoundsException. (hairong)
			
 
				+
			
 
				+    HADOOP-4403. Make TestLeaseRecovery and TestFileCreation more robust.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+    HADOOP-4292. Do not support append() for LocalFileSystem. (hairong)
			
 
				+
			
 
				+    HADOOP-4399. Make fuse-dfs multi-thread access safe.
			
 
				+    (Pete Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-4369. Use setMetric(...) instead of incrMetric(...) for metrics
			
 
				+    averages.  (Brian Bockelman via szetszwo)
			
 
				+
			
 
				+    HADOOP-4469. Rename and add the ant task jar file to the tar file. (nigel)
			
 
				+
			
 
				+    HADOOP-3914. DFSClient sends Checksum Ok only once for a block. 
			
 
				+    (Christian Kunz via hairong)
			
 
				+ 
			
 
				+    HADOOP-4467. SerializationFactory now uses the current context ClassLoader
			
 
				+    allowing for user supplied Serialization instances. (Chris Wensel via
			
 
				+    acmurthy)
			
 
				+
			
 
				+    HADOOP-4517. Release FSDataset lock before joining ongoing create threads.
			
 
				+    (szetszwo)
			
 
				+ 
			
 
				+    HADOOP-4526. fsck failing with NullPointerException. (hairong)
			
 
				+
			
 
				+    HADOOP-4483 Honor the max parameter in DatanodeDescriptor.getBlockArray(..)
			
 
				+    (Ahad Rana and Hairong Kuang via szetszwo)
			
 
				+
			
 
				+    HADOOP-4340. Correctly set the exit code from JobShell.main so that the
			
 
				+    'hadoop jar' command returns the right code to the user. (acmurthy)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-2421.  Add jdiff output to documentation, listing all API
			
 
				+    changes from the prior release.  (cutting)
			
 
				+
			
 
				+Release 0.18.1 - 2008-09-17
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-3934. Upgrade log4j to 1.2.15. (omalley)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3995. In case of quota failure on HDFS, rename does not restore
			
 
				+    source filename. (rangadi)
			
 
				+
			
 
				+    HADOOP-3821. Prevent SequenceFile and IFile from duplicating codecs in
			
 
				+    CodecPool when closed more than once. (Arun Murthy via cdouglas)
			
 
				+
			
 
				+    HADOOP-4040. Remove coded default of the IPC idle connection timeout
			
 
				+    from the TaskTracker, which was causing HDFS client connections to not be 
			
 
				+    collected. (ddas via omalley)
			
 
				+
			
 
				+    HADOOP-4046. Made WritableComparable's constructor protected instead of 
			
 
				+    private to re-enable class derivation. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-3940. Fix in-memory merge condition to wait when there are no map
			
 
				+    outputs or when the final map outputs are being fetched without contention.
			
 
				+    (cdouglas)
			
 
				+
			
 
				+Release 0.18.0 - 2008-08-19
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-2703.  The default options to fsck skips checking files
			
 
				+    that are being written to. The output of fsck is incompatible
			
 
				+    with previous release. (lohit vijayarenu via dhruba) 
			
 
				+
			
 
				+    HADOOP-2865. FsShell.ls() printout format changed to print file names
			
 
				+    in the end of the line. (Edward J. Yoon via shv)
			
 
				+
			
 
				+    HADOOP-3283. The Datanode has a RPC server. It currently supports
			
 
				+    two RPCs: the first RPC retrives the metadata about a block and the
			
 
				+    second RPC sets the generation stamp of an existing block.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2797. Code related to upgrading to 0.14 (Block CRCs) is 
			
 
				+    removed. As result, upgrade to 0.18 or later from 0.13 or earlier
			
 
				+    is not supported. If upgrading from 0.13 or earlier is required,
			
 
				+    please upgrade to an intermediate version (0.14-0.17) and then
			
 
				+    to this version. (rangadi)
			
 
				+
			
 
				+    HADOOP-544. This issue introduces new classes JobID, TaskID and 
			
 
				+    TaskAttemptID, which should be used instead of their string counterparts.
			
 
				+    Functions in JobClient, TaskReport, RunningJob, jobcontrol.Job and 
			
 
				+    TaskCompletionEvent that use string arguments are deprecated in favor 
			
 
				+    of the corresponding ones that use ID objects. Applications can use 
			
 
				+    xxxID.toString() and xxxID.forName() methods to convert/restore objects 
			
 
				+    to/from strings. (Enis Soztutar via ddas)
			
 
				+
			
 
				+    HADOOP-2188. RPC client sends a ping rather than throw timeouts.
			
 
				+    RPC server does not throw away old RPCs. If clients and the server are on
			
 
				+    different versions, they are not able to function well. In addition,
			
 
				+    The property ipc.client.timeout is removed from the default hadoop
			
 
				+    configuration. It also removes metrics RpcOpsDiscardedOPsNum. (hairong)
			
 
				+
			
 
				+    HADOOP-2181. This issue adds logging for input splits in Jobtracker log 
			
 
				+    and jobHistory log. Also adds web UI for viewing input splits in job UI 
			
 
				+    and history UI. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3226. Run combiners multiple times over map outputs as they
			
 
				+    are merged in both the map and the reduce tasks. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-3329.  DatanodeDescriptor objects should not be stored in the
			
 
				+    fsimage. (dhruba)
			
 
				+
			
 
				+    HADOOP-2656.  The Block object has a generation stamp inside it.
			
 
				+    Existing blocks get a generation stamp of 0. This is needed to support
			
 
				+    appends. (dhruba)
			
 
				+
			
 
				+    HADOOP-3390. Removed deprecated ClientProtocol.abandonFileInProgress().
			
 
				+    (Tsz Wo (Nicholas), SZE via rangadi)
			
 
				+
			
 
				+    HADOOP-3405. Made some map/reduce internal classes non-public:
			
 
				+    MapTaskStatus, ReduceTaskStatus, JobSubmissionProtocol, 
			
 
				+    CompletedJobStatusStore. (enis via omaley)
			
 
				+
			
 
				+    HADOOP-3265. Removed depcrecated API getFileCacheHints().
			
 
				+    (Lohit Vijayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-3310. The namenode instructs the primary datanode to do lease
			
 
				+    recovery. The block gets a new  generation stamp.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2909. Improve IPC idle connection management. Property
			
 
				+    ipc.client.maxidletime is removed from the default configuration,
			
 
				+    instead it is defined as twice of the ipc.client.connection.maxidletime.
			
 
				+    A connection with outstanding requests won't be treated as idle.
			
 
				+    (hairong)
			
 
				+
			
 
				+    HADOOP-3459. Change in the output format of dfs -ls to more closely match
			
 
				+    /bin/ls. New format is: perm repl owner group size date name
			
 
				+    (Mukund Madhugiri via omally)
			
 
				+
			
 
				+    HADOOP-3113. An fsync invoked on a HDFS file really really
			
 
				+    persists data! The datanode moves blocks in the tmp directory to 
			
 
				+    the real block directory on a datanode-restart. (dhruba)
			
 
				+
			
 
				+    HADOOP-3452. Change fsck to return non-zero status for a corrupt
			
 
				+    FileSystem. (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3193. Include the address of the client that found the corrupted
			
 
				+    block in the log. Also include a CorruptedBlocks metric to track the size
			
 
				+    of the corrupted block map. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3512. Separate out the tools into a tools jar. (omalley)
			
 
				+
			
 
				+    HADOOP-3598. Ensure that temporary task-output directories are not created
			
 
				+    if they are not necessary e.g. for Maps with no side-effect files.
			
 
				+    (acmurthy)
			
 
				+
			
 
				+    HADOOP-3665. Modify WritableComparator so that it only creates instances
			
 
				+    of the keytype if the type does not define a WritableComparator. Calling
			
 
				+    the superclass compare will throw a NullPointerException. Also define
			
 
				+    a RawComparator for NullWritable and permit it to be written as a key
			
 
				+    to SequenceFiles. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3673. Avoid deadlock caused by DataNode RPC receoverBlock().
			
 
				+    (Tsz Wo (Nicholas), SZE via rangadi)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-3074. Provides a UrlStreamHandler for DFS and other FS,
			
 
				+    relying on FileSystem (taton)
			
 
				+
			
 
				+    HADOOP-2585. Name-node imports namespace data from a recent checkpoint
			
 
				+    accessible via a NFS mount. (shv)
			
 
				+
			
 
				+    HADOOP-3061. Writable types for doubles and bytes. (Andrzej
			
 
				+    Bialecki via omalley)
			
 
				+
			
 
				+    HADOOP-2857. Allow libhdfs to set jvm options. (Craig Macdonald
			
 
				+    via omalley)
			
 
				+
			
 
				+    HADOOP-3317. Add default port for HDFS namenode.  The port in
			
 
				+    "hdfs:" URIs now defaults to 8020, so that one may simply use URIs
			
 
				+    of the form "hdfs://example.com/dir/file". (cutting)
			
 
				+
			
 
				+    HADOOP-2019. Adds support for .tar, .tgz and .tar.gz files in 
			
 
				+    DistributedCache (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3058. Add FSNamesystem status metrics. 
			
 
				+    (Lohit Vjayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-1915. Allow users to specify counters via strings instead
			
 
				+    of enumerations. (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-2065. Delay invalidating corrupt replicas of block until its 
			
 
				+    is removed from under replicated state. If all replicas are found to 
			
 
				+    be corrupt, retain all copies and mark the block as corrupt.
			
 
				+    (Lohit Vjayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-3221. Adds org.apache.hadoop.mapred.lib.NLineInputFormat, which 
			
 
				+    splits files into splits each of N lines. N can be specified by 
			
 
				+    configuration property "mapred.line.input.format.linespermap", which
			
 
				+    defaults to 1. (Amareshwari Sriramadasu via ddas) 
			
 
				+
			
 
				+    HADOOP-3336. Direct a subset of annotated FSNamesystem calls for audit
			
 
				+    logging. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3400. A new API FileSystem.deleteOnExit() that facilitates
			
 
				+    handling of temporary files in HDFS. (dhruba)
			
 
				+
			
 
				+    HADOOP-4.  Add fuse-dfs to contrib, permitting one to mount an
			
 
				+    HDFS filesystem on systems that support FUSE, e.g., Linux.
			
 
				+    (Pete Wyckoff via cutting)
			
 
				+
			
 
				+    HADOOP-3246. Add FTPFileSystem.  (Ankur Goel via cutting)
			
 
				+
			
 
				+    HADOOP-3250. Extend FileSystem API to allow appending to files.
			
 
				+    (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3177. Implement Syncable interface for FileSystem.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-1328. Implement user counters in streaming. (tomwhite via
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-3187. Quotas for namespace management. (Hairong Kuang via ddas)
			
 
				+
			
 
				+    HADOOP-3307. Support for Archives in Hadoop. (Mahadev Konar via ddas)
			
 
				+
			
 
				+    HADOOP-3460. Add SequenceFileAsBinaryOutputFormat to permit direct
			
 
				+    writes of serialized data. (Koji Noguchi via cdouglas)
			
 
				+
			
 
				+    HADOOP-3230. Add ability to get counter values from command
			
 
				+    line. (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-930. Add support for native S3 files.  (tomwhite via cutting)
			
 
				+
			
 
				+    HADOOP-3502. Quota API needs documentation in Forrest. (hairong)
			
 
				+
			
 
				+    HADOOP-3413. Allow SequenceFile.Reader to use serialization
			
 
				+    framework. (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-3541. Import of the namespace from a checkpoint documented 
			
 
				+    in hadoop user guide. (shv)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-3677. Simplify generation stamp upgrade by making is a 
			
 
				+    local upgrade on datandodes. Deleted distributed upgrade.
			
 
				+    (rangadi)
			
 
				+   
			
 
				+    HADOOP-2928. Remove deprecated FileSystem.getContentLength().
			
 
				+    (Lohit Vijayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-3130. Make the connect timeout smaller for getFile.
			
 
				+    (Amar Ramesh Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-3160. Remove deprecated exists() from ClientProtocol and 
			
 
				+    FSNamesystem (Lohit Vjayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-2910. Throttle IPC Clients during bursts of requests or
			
 
				+    server slowdown. Clients retry connection for up to 15 minutes
			
 
				+    when socket connection times out. (hairong)
			
 
				+
			
 
				+    HADOOP-3295. Allow TextOutputFormat to use configurable spearators.
			
 
				+    (Zheng Shao via cdouglas).
			
 
				+
			
 
				+    HADOOP-3308. Improve QuickSort by excluding values eq the pivot from the
			
 
				+    partition. (cdouglas)
			
 
				+
			
 
				+    HADOOP-2461. Trim property names in configuration.
			
 
				+    (Tsz Wo (Nicholas), SZE via shv)
			
 
				+
			
 
				+    HADOOP-2799. Deprecate o.a.h.io.Closable in favor of java.io.Closable.
			
 
				+    (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3345. Enhance the hudson-test-patch target to cleanup messages,
			
 
				+    fix minor defects, and add eclipse plugin and python unit tests. (nigel)
			
 
				+
			
 
				+    HADOOP-3144. Improve robustness of LineRecordReader by defining a maximum
			
 
				+    line length (mapred.linerecordreader.maxlength), thereby avoiding reading
			
 
				+    too far into the following split. (Zheng Shao via cdouglas)
			
 
				+
			
 
				+    HADOOP-3334. Move lease handling from FSNamesystem into a seperate class.
			
 
				+    (Tsz Wo (Nicholas), SZE via rangadi)
			
 
				+
			
 
				+    HADOOP-3332. Reduces the amount of logging in Reducer's shuffle phase.
			
 
				+    (Devaraj Das)
			
 
				+
			
 
				+    HADOOP-3355. Enhances Configuration class to accept hex numbers for getInt
			
 
				+    and getLong. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3350. Add an argument to distcp to permit the user to limit the
			
 
				+    number of maps. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3013. Add corrupt block reporting to fsck.
			
 
				+    (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3377. Remove TaskRunner::replaceAll and replace with equivalent
			
 
				+    String::replace. (Brice Arnould via cdouglas)
			
 
				+
			
 
				+    HADOOP-3398. Minor improvement to a utility function in that participates
			
 
				+    in backoff calculation. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3381. Clear referenced when directories are deleted so that 
			
 
				+    effect of memory leaks are not multiplied. (rangadi)
			
 
				+
			
 
				+    HADOOP-2867. Adds the task's CWD to its LD_LIBRARY_PATH. 
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3232. DU class runs the 'du' command in a seperate thread so
			
 
				+    that it does not block user. DataNode misses heartbeats in large
			
 
				+    nodes otherwise. (Johan Oskarsson via rangadi)
			
 
				+
			
 
				+    HADOOP-3035. During block transfers between datanodes, the receiving
			
 
				+    datanode, now can report corrupt replicas received from src node to
			
 
				+    the namenode. (Lohit Vijayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-3434. Retain the cause of the bind failure in Server::bind.
			
 
				+    (Steve Loughran via cdouglas)
			
 
				+
			
 
				+    HADOOP-3429. Increases the size of the buffers used for the communication
			
 
				+    for Streaming jobs. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3486. Change default for initial block report to 0 seconds
			
 
				+    and document it. (Sanjay Radia via omalley)
			
 
				+
			
 
				+    HADOOP-3448. Improve the text in the assertion making sure the
			
 
				+    layout versions are consistent in the data node. (Steve Loughran
			
 
				+    via omalley)
			
 
				+
			
 
				+    HADOOP-2095. Improve the Map-Reduce shuffle/merge by cutting down
			
 
				+    buffer-copies; changed intermediate sort/merge to use the new IFile format
			
 
				+    rather than SequenceFiles and compression of map-outputs is now
			
 
				+    implemented by compressing the entire file rather than SequenceFile
			
 
				+    compression. Shuffle also has been changed to use a simple byte-buffer
			
 
				+    manager rather than the InMemoryFileSystem. 
			
 
				+    Configuration changes to hadoop-default.xml:
			
 
				+      deprecated mapred.map.output.compression.type 
			
 
				+    (acmurthy)
			
 
				+
			
 
				+    HADOOP-236. JobTacker now refuses connection from a task tracker with a 
			
 
				+    different version number. (Sharad Agarwal via ddas)
			
 
				+
			
 
				+    HADOOP-3427. Improves the shuffle scheduler. It now waits for notifications
			
 
				+    from shuffle threads when it has scheduled enough, before scheduling more.
			
 
				+    (ddas)
			
 
				+
			
 
				+    HADOOP-2393. Moves the handling of dir deletions in the tasktracker to
			
 
				+    a separate thread. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3501. Deprecate InMemoryFileSystem. (cutting via omalley)
			
 
				+
			
 
				+    HADOOP-3366. Stall the shuffle while in-memory merge is in progress.
			
 
				+    (acmurthy) 
			
 
				+
			
 
				+    HADOOP-2916. Refactor src structure, but leave package structure alone.
			
 
				+    (Raghu Angadi via mukund) 
			
 
				+
			
 
				+    HADOOP-3492. Add forrest documentation for user archives.
			
 
				+    (Mahadev Konar via hairong)
			
 
				+
			
 
				+    HADOOP-3467. Improve documentation for FileSystem::deleteOnExit.
			
 
				+    (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3379. Documents stream.non.zero.exit.status.is.failure for Streaming.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3096. Improves documentation about the Task Execution Environment in 
			
 
				+    the Map-Reduce tutorial. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2984. Add forrest documentation for DistCp. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3406. Add forrest documentation for Profiling.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2762. Add forrest documentation for controls of memory limits on 
			
 
				+    hadoop daemons and Map-Reduce tasks. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3535. Fix documentation and name of IOUtils.close to
			
 
				+    reflect that it should only be used in cleanup contexts. (omalley)
			
 
				+
			
 
				+    HADOOP-3593. Updates the mapred tutorial. (ddas)
			
 
				+
			
 
				+    HADOOP-3547. Documents the way in which native libraries can be distributed
			
 
				+    via the DistributedCache. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3606. Updates the Streaming doc. (Amareshwari Sriramadasu via ddas) 
			
 
				+
			
 
				+    HADOOP-3532. Add jdiff reports to the build scripts. (omalley)
			
 
				+
			
 
				+    HADOOP-3100. Develop tests to test the DFS command line interface. (mukund)
			
 
				+
			
 
				+    HADOOP-3688. Fix up HDFS docs. (Robert Chansler via hairong)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+    HADOOP-3274. The default constructor of BytesWritable creates empty 
			
 
				+    byte array. (Tsz Wo (Nicholas), SZE via shv)
			
 
				+
			
 
				+    HADOOP-3272. Remove redundant copy of Block object in BlocksMap.
			
 
				+    (Lohit Vjayarenu via shv)
			
 
				+
			
 
				+    HADOOP-3164. Reduce DataNode CPU usage by using FileChannel.tranferTo().
			
 
				+    On Linux DataNode takes 5 times less CPU while serving data. Results may
			
 
				+    vary on other platforms. (rangadi)
			
 
				+
			
 
				+    HADOOP-3248. Optimization of saveFSImage. (Dhruba via shv)
			
 
				+
			
 
				+    HADOOP-3297. Fetch more task completion events from the job
			
 
				+    tracker and task tracker. (ddas via omalley)
			
 
				+
			
 
				+    HADOOP-3364. Faster image and log edits loading. (shv)
			
 
				+
			
 
				+    HADOOP-3369. Fast block processing during name-node startup. (shv)
			
 
				+
			
 
				+    HADOOP-1702. Reduce buffer copies when data is written to DFS. 
			
 
				+    DataNodes take 30% less CPU while writing data. (rangadi)
			
 
				+
			
 
				+    HADOOP-3095. Speed up split generation in the FileInputSplit,
			
 
				+    especially for non-HDFS file systems. Deprecates
			
 
				+    InputFormat.validateInput. (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-3552. Add forrest documentation for Hadoop commands.
			
 
				+    (Sharad Agarwal via cdouglas)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2905. 'fsck -move' triggers NPE in NameNode. 
			
 
				+    (Lohit Vjayarenu via rangadi)
			
 
				+
			
 
				+    Increment ClientProtocol.versionID missed by HADOOP-2585. (shv)
			
 
				+
			
 
				+    HADOOP-3254. Restructure internal namenode methods that process
			
 
				+    heartbeats to use well-defined BlockCommand object(s) instead of 
			
 
				+    using the base java Object. (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-3176.  Change lease record when a open-for-write-file 
			
 
				+    gets renamed. (dhruba)
			
 
				+
			
 
				+    HADOOP-3269.  Fix a case when namenode fails to restart
			
 
				+    while processing a lease record.  ((Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-3282. Port issues in TestCheckpoint resolved. (shv)
			
 
				+
			
 
				+    HADOOP-3268. file:// URLs issue in TestUrlStreamHandler under Windows.
			
 
				+    (taton)
			
 
				+
			
 
				+    HADOOP-3127. Deleting files in trash should really remove them.
			
 
				+    (Brice Arnould via omalley)
			
 
				+
			
 
				+    HADOOP-3300. Fix locking of explicit locks in NetworkTopology.
			
 
				+    (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-3270. Constant DatanodeCommands are stored in static final
			
 
				+    immutable variables for better code clarity.  
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2793. Fix broken links for worst performing shuffle tasks in
			
 
				+    the job history page. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3313. Avoid unnecessary calls to System.currentTimeMillis
			
 
				+    in RPC::Invoker. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3318. Recognize "Darwin" as an alias for "Mac OS X" to
			
 
				+    support Soylatte. (Sam Pullara via omalley)
			
 
				+
			
 
				+    HADOOP-3301. Fix misleading error message when S3 URI hostname
			
 
				+    contains an underscore. (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-3338. Fix Eclipse plugin to compile after HADOOP-544 was
			
 
				+    committed. Updated all references to use the new JobID representation.
			
 
				+    (taton via nigel)
			
 
				+
			
 
				+    HADOOP-3337. Loading FSEditLog was broken by HADOOP-3283 since it 
			
 
				+    changed Writable serialization of DatanodeInfo. This patch handles it.
			
 
				+    (Tsz Wo (Nicholas), SZE via rangadi)
			
 
				+
			
 
				+    HADOOP-3101. Prevent JobClient from throwing an exception when printing
			
 
				+    usage. (Edward J. Yoon via cdouglas)
			
 
				+
			
 
				+    HADOOP-3119. Update javadoc for Text::getBytes to better describe its
			
 
				+    behavior. (Tim Nelson via cdouglas)
			
 
				+
			
 
				+    HADOOP-2294. Fix documentation in libhdfs to refer to the correct free
			
 
				+    function. (Craig Macdonald via cdouglas)
			
 
				+
			
 
				+    HADOOP-3335. Prevent the libhdfs build from deleting the wrong
			
 
				+    files on make clean. (cutting via omalley)
			
 
				+
			
 
				+    HADOOP-2930. Make {start,stop}-balancer.sh work even if hadoop-daemon.sh
			
 
				+    is not in the PATH. (Spiros Papadimitriou via hairong)
			
 
				+
			
 
				+    HADOOP-3085. Catch Exception in metrics util classes to ensure that
			
 
				+    misconfigured metrics don't prevent others from updating. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3299. CompositeInputFormat should configure the sub-input
			
 
				+    formats. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-3309. Lower io.sort.mb and fs.inmemory.size.mb for MiniMRDFSSort
			
 
				+    unit test so it passes on Windows. (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3348. TestUrlStreamHandler should set URLStreamFactory after
			
 
				+    DataNodes are initialized. (Lohit Vijayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-3371. Ignore InstanceAlreadyExistsException from
			
 
				+    MBeanUtil::registerMBean. (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3349. A file rename was incorrectly changing the name inside a
			
 
				+    lease record. (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-3365. Removes an unnecessary copy of the key from SegmentDescriptor
			
 
				+    to MergeQueue. (Devaraj Das)
			
 
				+
			
 
				+    HADOOP-3388. Fix for TestDatanodeBlockScanner to handle blocks with
			
 
				+    generation stamps in them.  (dhruba)
			
 
				+
			
 
				+    HADOOP-3203. Fixes TaskTracker::localizeJob to pass correct file sizes
			
 
				+    for the jarfile and the jobfile. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3391. Fix a findbugs warning introduced by HADOOP-3248 (rangadi)
			
 
				+
			
 
				+    HADOOP-3393. Fix datanode shutdown to call DataBlockScanner::shutdown and
			
 
				+    close its log, even if the scanner thread is not running. (lohit vijayarenu
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-3399. A debug message was logged at info level. (rangadi)
			
 
				+
			
 
				+    HADOOP-3396. TestDatanodeBlockScanner occationally fails. 
			
 
				+    (Lohit Vijayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-3339. Some of the failures on 3rd datanode in DFS write pipelie 
			
 
				+    are not detected properly. This could lead to hard failure of client's
			
 
				+    write operation. (rangadi)
			
 
				+
			
 
				+    HADOOP-3409. Namenode should save the root inode into fsimage. (hairong)
			
 
				+
			
 
				+    HADOOP-3296. Fix task cache to work for more than two levels in the cache
			
 
				+    hierarchy. This also adds a new counter to track cache hits at levels
			
 
				+    greater than two. (Amar Kamat via cdouglas)
			
 
				+
			
 
				+    HADOOP-3375. Lease paths were sometimes not removed from 
			
 
				+    LeaseManager.sortedLeasesByPath. (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-3424. Values returned by getPartition should be checked to
			
 
				+    make sure they are in the range 0 to #reduces - 1 (cdouglas via
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-3408. Change FSNamesystem to send its metrics as integers to
			
 
				+    accommodate collectors that don't support long values. (lohit vijayarenu
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-3403. Fixes a problem in the JobTracker to do with handling of lost
			
 
				+    tasktrackers. (Arun Murthy via ddas)
			
 
				+
			
 
				+    HADOOP-1318. Completed maps are not failed if the number of reducers are
			
 
				+    zero. (Amareshwari Sriramadasu via ddas).
			
 
				+
			
 
				+    HADOOP-3351. Fixes the history viewer tool to not do huge StringBuffer
			
 
				+    allocations. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3419. Fixes TestFsck to wait for updates to happen before
			
 
				+    checking results to make the test more reliable. (Lohit Vijaya
			
 
				+    Renu via omalley)
			
 
				+
			
 
				+    HADOOP-3259. Makes failure to read system properties due to a
			
 
				+    security manager non-fatal. (Edward Yoon via omalley)
			
 
				+
			
 
				+    HADOOP-3451. Update libhdfs to use FileSystem::getFileBlockLocations
			
 
				+    instead of removed getFileCacheHints. (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3401. Update FileBench to set the new
			
 
				+    "mapred.work.output.dir" property to work post-3041. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-2669. DFSClient locks pendingCreates appropriately. (dhruba)
			
 
				+ 
			
 
				+    HADOOP-3410. Fix KFS implemenation to return correct file
			
 
				+    modification time.  (Sriram Rao via cutting)
			
 
				+
			
 
				+    HADOOP-3340. Fix DFS metrics for BlocksReplicated, HeartbeatsNum, and
			
 
				+    BlockReportsAverageTime. (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3435. Remove the assuption in the scripts that bash is at
			
 
				+    /bin/bash and fix the test patch to require bash instead of sh.
			
 
				+    (Brice Arnould via omalley)
			
 
				+
			
 
				+    HADOOP-3471. Fix spurious errors from TestIndexedSort and add additional
			
 
				+    logging to let failures be reproducible. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3443. Avoid copying map output across partitions when renaming a
			
 
				+    single spill. (omalley via cdouglas)
			
 
				+
			
 
				+    HADOOP-3454. Fix Text::find to search only valid byte ranges. (Chad Whipkey
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-3417. Removes the static configuration variable,
			
 
				+    commandLineConfig from JobClient. Moves the cli parsing from
			
 
				+    JobShell to GenericOptionsParser.  Thus removes the class
			
 
				+    org.apache.hadoop.mapred.JobShell.  (Amareshwari Sriramadasu via
			
 
				+    ddas)
			
 
				+
			
 
				+    HADOOP-2132. Only RUNNING/PREP jobs can be killed. (Jothi Padmanabhan 
			
 
				+    via ddas)
			
 
				+
			
 
				+    HADOOP-3476. Code cleanup in fuse-dfs.
			
 
				+    (Peter Wyckoff via dhruba)
			
 
				+
			
 
				+    HADOOP-2427. Ensure that the cwd of completed tasks is cleaned-up
			
 
				+    correctly on task-completion. (Amareshwari Sri Ramadasu via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2565. Remove DFSPath cache of FileStatus. 
			
 
				+    (Tsz Wo (Nicholas), SZE via hairong)
			
 
				+
			
 
				+    HADOOP-3326. Cleanup the local-fs and in-memory merge in the ReduceTask by
			
 
				+    spawing only one thread each for the on-disk and in-memory merge.
			
 
				+    (Sharad Agarwal via acmurthy)
			
 
				+
			
 
				+    HADOOP-3493. Fix TestStreamingFailure to use FileUtil.fullyDelete to
			
 
				+    ensure correct cleanup. (Lohit Vijayarenu via acmurthy) 
			
 
				+
			
 
				+    HADOOP-3455. Fix NPE in ipc.Client in case of connection failure and
			
 
				+    improve its synchronization. (hairong)
			
 
				+
			
 
				+    HADOOP-3240. Fix a testcase to not create files in the current directory.
			
 
				+    Instead the file is created in the test directory (Mahadev Konar via ddas)
			
 
				+
			
 
				+    HADOOP-3496.  Fix failure in TestHarFileSystem.testArchives due to change
			
 
				+    in HADOOP-3095.  (tomwhite)
			
 
				+
			
 
				+    HADOOP-3135. Get the system directory from the JobTracker instead of from
			
 
				+    the conf. (Subramaniam Krishnan via ddas)
			
 
				+
			
 
				+    HADOOP-3503. Fix a race condition when client and namenode start
			
 
				+    simultaneous recovery of the same block.  (dhruba & Tsz Wo
			
 
				+    (Nicholas), SZE)
			
 
				+
			
 
				+    HADOOP-3440. Fixes DistributedCache to not create symlinks for paths which
			
 
				+    don't have fragments even when createSymLink is true. 
			
 
				+    (Abhijit Bagri via ddas) 
			
 
				+
			
 
				+    HADOOP-3463. Hadoop-daemons script should cd to $HADOOP_HOME. (omalley)
			
 
				+
			
 
				+    HADOOP-3489. Fix NPE in SafeModeMonitor. (Lohit Vijayarenu via shv)
			
 
				+
			
 
				+    HADOOP-3509. Fix NPE in FSNamesystem.close. (Tsz Wo (Nicholas), SZE via 
			
 
				+    shv)
			
 
				+
			
 
				+    HADOOP-3491. Name-node shutdown causes InterruptedException in 
			
 
				+    ResolutionMonitor. (Lohit Vijayarenu via shv)
			
 
				+
			
 
				+    HADOOP-3511. Fixes namenode image to not set the root's quota to an
			
 
				+    invalid value when the quota was not saved in the image. (hairong)
			
 
				+
			
 
				+    HADOOP-3516. Ensure the JobClient in HadoopArchives is initialized
			
 
				+    with a configuration. (Subramaniam Krishnan via omalley)
			
 
				+
			
 
				+    HADOOP-3513. Improve NNThroughputBenchmark log messages. (shv)
			
 
				+
			
 
				+    HADOOP-3519.  Fix NPE in DFS FileSystem rename.  (hairong via tomwhite)
			
 
				+    
			
 
				+    HADOOP-3528. Metrics FilesCreated and files_deleted metrics
			
 
				+    do not match. (Lohit via Mahadev)
			
 
				+
			
 
				+    HADOOP-3418. When a directory is deleted, any leases that point to files
			
 
				+    in the subdirectory are removed. ((Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-3542. Diables the creation of _logs directory for the archives
			
 
				+    directory. (Mahadev Konar via ddas)
			
 
				+
			
 
				+    HADOOP-3544. Fixes a documentation issue for hadoop archives.
			
 
				+    (Mahadev Konar via ddas)
			
 
				+
			
 
				+    HADOOP-3517. Fixes a problem in the reducer due to which the last InMemory
			
 
				+    merge may be missed. (Arun Murthy via ddas)
			
 
				+
			
 
				+    HADOOP-3548. Fixes build.xml to copy all *.jar files to the dist.
			
 
				+    (Owen O'Malley via ddas)
			
 
				+
			
 
				+    HADOOP-3363. Fix unformatted storage detection in FSImage. (shv)
			
 
				+
			
 
				+    HADOOP-3560. Fixes a problem to do with split creation in archives.
			
 
				+    (Mahadev Konar via ddas)
			
 
				+
			
 
				+    HADOOP-3545. Fixes a overflow problem in archives.
			
 
				+    (Mahadev Konar via ddas)
			
 
				+
			
 
				+    HADOOP-3561. Prevent the trash from deleting its parent directories.
			
 
				+    (cdouglas)
			
 
				+
			
 
				+    HADOOP-3575. Fix the clover ant target after package refactoring.
			
 
				+    (Nigel Daley via cdouglas)
			
 
				+
			
 
				+    HADOOP-3539.  Fix the tool path in the bin/hadoop script under
			
 
				+    cygwin. (Tsz Wo (Nicholas), Sze via omalley)
			
 
				+
			
 
				+    HADOOP-3520.  TestDFSUpgradeFromImage triggers a race condition in the
			
 
				+    Upgrade Manager. Fixed. (dhruba)
			
 
				+
			
 
				+    HADOOP-3586. Provide deprecated, backwards compatibile semantics for the
			
 
				+    combiner to be run once and only once on each record. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3533. Add deprecated methods to provide API compatibility
			
 
				+    between 0.18 and 0.17. Remove the deprecated methods in trunk. (omalley)
			
 
				+
			
 
				+    HADOOP-3580. Fixes a problem to do with specifying a har as an input to 
			
 
				+    a job. (Mahadev Konar via ddas)
			
 
				+
			
 
				+    HADOOP-3333. Don't assign a task to a tasktracker that it failed to  
			
 
				+    execute earlier (used to happen in the case of lost tasktrackers where
			
 
				+    the tasktracker would reinitialize and bind to a different port). 
			
 
				+    (Jothi Padmanabhan and Arun Murthy via ddas)
			
 
				+
			
 
				+    HADOOP-3534. Log IOExceptions that happen in closing the name
			
 
				+    system when the NameNode shuts down. (Tsz Wo (Nicholas) Sze via omalley)
			
 
				+
			
 
				+    HADOOP-3546. TaskTracker re-initialization gets stuck in cleaning up.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3576. Fix NullPointerException when renaming a directory
			
 
				+    to its subdirectory. (Tse Wo (Nicholas), SZE via hairong)
			
 
				+
			
 
				+    HADOOP-3320. Fix NullPointerException in NetworkTopology.getDistance().
			
 
				+    (hairong)
			
 
				+
			
 
				+    HADOOP-3569. KFS input stream read() now correctly reads 1 byte
			
 
				+    instead of 4. (Sriram Rao via omalley)
			
 
				+
			
 
				+    HADOOP-3599. Fix JobConf::setCombineOnceOnly to modify the instance rather
			
 
				+    than a parameter. (Owen O'Malley via cdouglas)
			
 
				+
			
 
				+    HADOOP-3590. Null pointer exception in JobTracker when the task tracker is 
			
 
				+    not yet resolved. (Amar Ramesh Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-3603. Fix MapOutputCollector to spill when io.sort.spill.percent is
			
 
				+    1.0 and to detect spills when emitted records write no data. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3615. Set DatanodeProtocol.versionID to the correct value.
			
 
				+    (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3559. Fix the libhdfs test script and config to work with the
			
 
				+    current semantics. (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3480.  Need to update Eclipse template to reflect current trunk.
			
 
				+    (Brice Arnould via tomwhite)
			
 
				+  
			
 
				+    HADOOP-3588. Fixed usability issues with archives. (mahadev)
			
 
				+
			
 
				+    HADOOP-3635. Uncaught exception in DataBlockScanner.
			
 
				+    (Tsz Wo (Nicholas), SZE via hairong)
			
 
				+
			
 
				+    HADOOP-3639. Exception when closing DFSClient while multiple files are
			
 
				+    open. (Benjamin Gufler via hairong)
			
 
				+
			
 
				+    HADOOP-3572. SetQuotas usage interface has some minor bugs. (hairong)
			
 
				+
			
 
				+    HADOOP-3649. Fix bug in removing blocks from the corrupted block map.
			
 
				+    (Lohit Vijayarenu via shv)
			
 
				+
			
 
				+    HADOOP-3604. Work around a JVM synchronization problem observed while
			
 
				+    retrieving the address of direct buffers from compression code by obtaining
			
 
				+    a lock during this call. (Arun C Murthy via cdouglas)
			
 
				+
			
 
				+    HADOOP-3683. Fix dfs metrics to count file listings rather than files
			
 
				+    listed. (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3597. Fix SortValidator to use filesystems other than the default as
			
 
				+    input. Validation job still runs on default fs.
			
 
				+    (Jothi Padmanabhan via cdouglas)
			
 
				+
			
 
				+    HADOOP-3693. Fix archives, distcp and native library documentation to
			
 
				+    conform to style guidelines. (Amareshwari Sriramadasu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3653. Fix test-patch target to properly account for Eclipse
			
 
				+    classpath jars. (Brice Arnould via nigel)
			
 
				+
			
 
				+    HADOOP-3692. Fix documentation for Cluster setup and Quick start guides. 
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3691. Fix streaming and tutorial docs. (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-3630. Fix NullPointerException in CompositeRecordReader from empty
			
 
				+    sources (cdouglas)
			
 
				+
			
 
				+    HADOOP-3706. Fix a ClassLoader issue in the mapred.join Parser that
			
 
				+    prevents it from loading user-specified InputFormats.
			
 
				+    (Jingkei Ly via cdouglas)
			
 
				+
			
 
				+    HADOOP-3718. Fix KFSOutputStream::write(int) to output a byte instead of
			
 
				+    an int, per the OutputStream contract. (Sriram Rao via cdouglas)
			
 
				+
			
 
				+    HADOOP-3647. Add debug logs to help track down a very occassional,
			
 
				+    hard-to-reproduce, bug in shuffle/merge on the reducer. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-3716. Prevent listStatus in KosmosFileSystem from returning
			
 
				+    null for valid, empty directories. (Sriram Rao via cdouglas)
			
 
				+
			
 
				+    HADOOP-3752. Fix audit logging to record rename events. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3737. Fix CompressedWritable to call Deflater::end to release
			
 
				+    compressor memory. (Grant Glouser via cdouglas)
			
 
				+
			
 
				+    HADOOP-3670. Fixes JobTracker to clear out split bytes when no longer 
			
 
				+    required. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3755. Update gridmix to work with HOD 0.4 (Runping Qi via cdouglas)
			
 
				+  
			
 
				+    HADOOP-3743. Fix -libjars, -files, -archives options to work even if 
			
 
				+    user code does not implement tools. (Amareshwari Sriramadasu via mahadev)
			
 
				+
			
 
				+    HADOOP-3774. Fix typos in shell output. (Tsz Wo (Nicholas), SZE via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-3762. Fixed FileSystem cache to work with the default port. (cutting
			
 
				+    via omalley)
			
 
				+
			
 
				+    HADOOP-3798. Fix tests compilation. (Mukund Madhugiri via omalley)
			
 
				+
			
 
				+    HADOOP-3794. Return modification time instead of zero for KosmosFileSystem.
			
 
				+    (Sriram Rao via cdouglas)
			
 
				+
			
 
				+    HADOOP-3806. Remove debug statement to stdout from QuickSort. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3776. Fix NPE at NameNode when datanode reports a block after it is
			
 
				+    deleted at NameNode. (rangadi)
			
 
				+
			
 
				+    HADOOP-3537. Disallow adding a datanode to a network topology when its
			
 
				+    network location is not resolved. (hairong)
			
 
				+
			
 
				+    HADOOP-3571. Fix bug in block removal used in lease recovery. (shv)
			
 
				+
			
 
				+    HADOOP-3645. MetricsTimeVaryingRate returns wrong value for
			
 
				+    metric_avg_time. (Lohit Vijayarenu via hairong)
			
 
				+
			
 
				+    HADOOP-3521. Reverted the missing cast to float for sending Counters' values
			
 
				+    to Hadoop metrics which was removed by HADOOP-544. (acmurthy)   
			
 
				+
			
 
				+    HADOOP-3820. Fixes two problems in the gridmix-env - a syntax error, and a 
			
 
				+    wrong definition of USE_REAL_DATASET by default. (Arun Murthy via ddas)
			
 
				+
			
 
				+    HADOOP-3724. Fixes two problems related to storing and recovering lease
			
 
				+    in the fsimage. (dhruba)
			
 
				+    
			
 
				+    HADOOP-3827.  Fixed compression of empty map-outputs. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-3865. Remove reference to FSNamesystem from metrics preventing
			
 
				+    garbage collection. (Lohit Vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3884.  Fix so that Eclipse plugin builds against recent
			
 
				+    Eclipse releases.  (cutting)
			
 
				+
			
 
				+    HADOOP-3837. Streaming jobs report progress status. (dhruba)
			
 
				+
			
 
				+    HADOOP-3897. Fix a NPE in secondary namenode. (Lohit Vijayarenu via 
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-3901. Fix bin/hadoop to correctly set classpath under cygwin.
			
 
				+    (Tsz Wo (Nicholas) Sze via omalley)
			
 
				+
			
 
				+    HADOOP-3947. Fix a problem in tasktracker reinitialization. 
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+Release 0.17.3 - Unreleased
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-4164. Chinese translation of the documentation. (Xuebing Yan via 
			
 
				+    omalley)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-4277. Checksum verification was mistakenly disabled for
			
 
				+    LocalFileSystem. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-4271. Checksum input stream can sometimes return invalid 
			
 
				+    data to the user. (Ning Li via rangadi)
			
 
				+
			
 
				+    HADOOP-4318. DistCp should use absolute paths for cleanup.  (szetszwo)
			
 
				+
			
 
				+    HADOOP-4326. ChecksumFileSystem does not override create(...) correctly.
			
 
				+    (szetszwo)
			
 
				+
			
 
				+Release 0.17.2 - 2008-08-11
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3678. Avoid spurious exceptions logged at DataNode when clients
			
 
				+    read from DFS. (rangadi)
			
 
				+
			
 
				+    HADOOP-3707. NameNode keeps a count of number of blocks scheduled
			
 
				+    to be written to a datanode and uses it to avoid allocating more
			
 
				+    blocks than a datanode can hold. (rangadi)
			
 
				+
			
 
				+    HADOOP-3760. Fix a bug with HDFS file close() mistakenly introduced
			
 
				+    by HADOOP-3681. (Lohit Vijayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-3681. DFSClient can get into an infinite loop while closing
			
 
				+    a file if there are some errors. (Lohit Vijayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-3002. Hold off block removal while in safe mode. (shv)
			
 
				+
			
 
				+    HADOOP-3685. Unbalanced replication target. (hairong)
			
 
				+
			
 
				+    HADOOP-3758. Shutdown datanode on version mismatch instead of retrying
			
 
				+    continuously, preventing excessive logging at the namenode.
			
 
				+    (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3633. Correct exception handling in DataXceiveServer, and throttle
			
 
				+    the number of xceiver threads in a data-node. (shv)
			
 
				+
			
 
				+    HADOOP-3370. Ensure that the TaskTracker.runningJobs data-structure is
			
 
				+    correctly cleaned-up on task completion. (Zheng Shao via acmurthy) 
			
 
				+
			
 
				+    HADOOP-3813. Fix task-output clean-up on HDFS to use the recursive 
			
 
				+    FileSystem.delete rather than the FileUtil.fullyDelete. (Amareshwari
			
 
				+    Sri Ramadasu via acmurthy)  
			
 
				+
			
 
				+    HADOOP-3859. Allow the maximum number of xceivers in the data node to
			
 
				+    be configurable. (Johan Oskarsson via omalley)
			
 
				+
			
 
				+    HADOOP-3931. Fix corner case in the map-side sort that causes some values 
			
 
				+    to be counted as too large and cause pre-mature spills to disk. Some values
			
 
				+    will also bypass the combiner incorrectly. (cdouglas via omalley)
			
 
				+
			
 
				+Release 0.17.1 - 2008-06-23
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-3565. Fix the Java serialization, which is not enabled by
			
 
				+    default, to clear the state of the serializer between objects.
			
 
				+    (tomwhite via omalley)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-3522. Improve documentation on reduce pointing out that
			
 
				+    input keys and values will be reused. (omalley)
			
 
				+
			
 
				+    HADOOP-3487. Balancer uses thread pools for managing its threads;
			
 
				+    therefore provides better resource management. (hairong)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2159 Namenode stuck in safemode. The counter blockSafe should
			
 
				+    not be decremented for invalid blocks. (hairong)
			
 
				+
			
 
				+    HADOOP-3472 MapFile.Reader getClosest() function returns incorrect results
			
 
				+    when before is true (Todd Lipcon via Stack)
			
 
				+
			
 
				+    HADOOP-3442. Limit recursion depth on the stack for QuickSort to prevent
			
 
				+    StackOverflowErrors. To avoid O(n*n) cases, when partitioning depth exceeds
			
 
				+    a multiple of log(n), change to HeapSort. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3477. Fix build to not package contrib/*/bin twice in
			
 
				+    distributions.  (Adam Heath via cutting)
			
 
				+
			
 
				+    HADOOP-3475. Fix MapTask to correctly size the accounting allocation of
			
 
				+    io.sort.mb. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3550. Fix the serialization data structures in MapTask where the
			
 
				+    value lengths are incorrectly calculated. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3526. Fix contrib/data_join framework by cloning values retained
			
 
				+    in the reduce. (Spyros Blanas via cdouglas)
			
 
				+
			
 
				+    HADOOP-1979. Speed up fsck by adding a buffered stream. (Lohit
			
 
				+    Vijaya Renu via omalley)
			
 
				+
			
 
				+Release 0.17.0 - 2008-05-18
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-2786.  Move hbase out of hadoop core
			
 
				+
			
 
				+    HADOOP-2345.  New HDFS transactions to support appending 
			
 
				+    to files.  Disk layout version changed from -11 to -12. (dhruba)
			
 
				+
			
 
				+    HADOOP-2192. Error messages from "dfs mv" command improved.
			
 
				+    (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-1902. "dfs du" command without any arguments operates on the
			
 
				+    current working directory.  (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-2873.  Fixed bad disk format introduced by HADOOP-2345.
			
 
				+    Disk layout version changed from -12 to -13. See changelist 630992
			
 
				+    (dhruba)
			
 
				+
			
 
				+    HADOOP-1985.  This addresses rack-awareness for Map tasks and for 
			
 
				+    HDFS in a uniform way. (ddas)
			
 
				+
			
 
				+    HADOOP-1986.  Add support for a general serialization mechanism for
			
 
				+    Map Reduce. (tomwhite)
			
 
				+
			
 
				+    HADOOP-771. FileSystem.delete() takes an explicit parameter that
			
 
				+    specifies whether a recursive delete is intended.
			
 
				+    (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-2470. Remove getContentLength(String), open(String, long, long)
			
 
				+    and isDir(String) from ClientProtocol. ClientProtocol version changed
			
 
				+    from 26 to 27. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-2822. Remove deprecated code for classes InputFormatBase and 
			
 
				+    PhasedFileSystem. (Amareshwari Sriramadasu via enis)
			
 
				+
			
 
				+    HADOOP-2116. Changes the layout of the task execution directory. 
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2828. The following deprecated methods in Configuration.java
			
 
				+    have been removed
			
 
				+        getObject(String name)
			
 
				+        setObject(String name, Object value)
			
 
				+        get(String name, Object defaultValue)
			
 
				+        set(String name, Object value)
			
 
				+        Iterator entries()
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2824. Removes one deprecated constructor from MiniMRCluster.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2823. Removes deprecated methods getColumn(), getLine() from
			
 
				+    org.apache.hadoop.record.compiler.generated.SimpleCharStream. 
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3060. Removes one unused constructor argument from MiniMRCluster.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2854. Remove deprecated o.a.h.ipc.Server::getUserInfo().
			
 
				+    (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-2563. Remove deprecated FileSystem::listPaths.
			
 
				+    (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-2818.  Remove deprecated methods in Counters.
			
 
				+    (Amareshwari Sriramadasu via tomwhite)
			
 
				+
			
 
				+    HADOOP-2831. Remove deprecated o.a.h.dfs.INode::getAbsoluteName()
			
 
				+    (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-2839. Remove deprecated FileSystem::globPaths.
			
 
				+    (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-2634. Deprecate ClientProtocol::exists.
			
 
				+    (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-2410.  Make EC2 cluster nodes more independent of each other.
			
 
				+    Multiple concurrent EC2 clusters are now supported, and nodes may be
			
 
				+    added to a cluster on the fly with new nodes starting in the same EC2
			
 
				+    availability zone as the cluster.  Ganglia monitoring and large
			
 
				+    instance sizes have also been added.  (Chris K Wensel via tomwhite)
			
 
				+
			
 
				+    HADOOP-2826. Deprecated FileSplit.getFile(), LineRecordReader.readLine().
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3239. getFileInfo() returns null for non-existing files instead
			
 
				+    of throwing FileNotFoundException. (Lohit Vijayarenu via shv)
			
 
				+
			
 
				+    HADOOP-3266. Removed HOD changes from CHANGES.txt, as they are now inside 
			
 
				+    src/contrib/hod  (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-3280. Separate the configuration of the virtual memory size
			
 
				+    (mapred.child.ulimit) from the jvm heap size, so that 64 bit
			
 
				+    streaming applications are supported even when running with 32 bit
			
 
				+    jvms. (acmurthy via omalley)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-1398.  Add HBase in-memory block cache.  (tomwhite)
			
 
				+
			
 
				+    HADOOP-2178.  Job History on DFS. (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2063. A new parameter to dfs -get command to fetch a file 
			
 
				+    even if it is corrupted.  (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2219. A new command "df -count" that counts the number of
			
 
				+    files and directories.  (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2906. Add an OutputFormat capable of using keys, values, and
			
 
				+    config params to map records to different output files.
			
 
				+    (Runping Qi via cdouglas)
			
 
				+
			
 
				+    HADOOP-2346. Utilities to support timeout while writing to sockets.
			
 
				+    DFSClient and DataNode sockets have 10min write timeout. (rangadi)
			
 
				+    
			
 
				+    HADOOP-2951.  Add a contrib module that provides a utility to
			
 
				+    build or update Lucene indexes using Map/Reduce.  (Ning Li via cutting)
			
 
				+
			
 
				+    HADOOP-1622.  Allow multiple jar files for map reduce.
			
 
				+    (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-2055. Allows users to set PathFilter on the FileInputFormat.
			
 
				+    (Alejandro Abdelnur via ddas)
			
 
				+
			
 
				+    HADOOP-2551. More environment variables like HADOOP_NAMENODE_OPTS
			
 
				+    for better control of HADOOP_OPTS for each component. (rangadi)
			
 
				+
			
 
				+    HADOOP-3001. Add job counters that measure the number of bytes
			
 
				+    read and written to HDFS, S3, KFS, and local file systems. (omalley)
			
 
				+
			
 
				+    HADOOP-3048.  A new Interface and a default implementation to convert 
			
 
				+    and restore serializations of objects to/from strings. (enis)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-2655. Copy on write for data and metadata files in the 
			
 
				+    presence of snapshots. Needed for supporting appends to HDFS
			
 
				+    files. (dhruba) 
			
 
				+
			
 
				+    HADOOP-1967.  When a Path specifies the same scheme as the default
			
 
				+    FileSystem but no authority, the default FileSystem's authority is
			
 
				+    used.  Also add warnings for old-format FileSystem names, accessor
			
 
				+    methods for fs.default.name, and check for null authority in HDFS.
			
 
				+    (cutting)
			
 
				+
			
 
				+    HADOOP-2895. Let the profiling string be configurable.
			
 
				+    (Martin Traverso via cdouglas)
			
 
				+
			
 
				+    HADOOP-910. Enables Reduces to do merges for the on-disk map output files 
			
 
				+    in parallel with their copying. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-730. Use rename rather than copy for local renames. (cdouglas)
			
 
				+
			
 
				+    HADOOP-2810. Updated the Hadoop Core logo. (nigel)
			
 
				+
			
 
				+    HADOOP-2057.  Streaming should optionally treat a non-zero exit status
			
 
				+    of a child process as a failed task.  (Rick Cox via tomwhite)
			
 
				+
			
 
				+    HADOOP-2765. Enables specifying ulimits for streaming/pipes tasks (ddas)
			
 
				+
			
 
				+    HADOOP-2888. Make gridmix scripts more readily configurable and amenable
			
 
				+    to automated execution. (Mukund Madhugiri via cdouglas)
			
 
				+
			
 
				+    HADOOP-2908.  A document that describes the DFS Shell command. 
			
 
				+    (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-2981.  Update README.txt to reflect the upcoming use of
			
 
				+    cryptography. (omalley)
			
 
				+
			
 
				+    HADOOP-2804.  Add support to publish CHANGES.txt as HTML when running
			
 
				+    the Ant 'docs' target. (nigel)
			
 
				+
			
 
				+    HADOOP-2559. Change DFS block placement to allocate the first replica
			
 
				+    locally, the second off-rack, and the third intra-rack from the
			
 
				+    second. (lohit vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-2939. Make the automated patch testing process an executable 
			
 
				+    Ant target, test-patch. (nigel)
			
 
				+
			
 
				+    HADOOP-2239. Add HsftpFileSystem to permit transferring files over ssl.
			
 
				+    (cdouglas)
			
 
				+
			
 
				+    HADOOP-2886.  Track individual RPC metrics.
			
 
				+    (girish vaitheeswaran via dhruba)
			
 
				+
			
 
				+    HADOOP-2373. Improvement in safe-mode reporting. (shv)
			
 
				+
			
 
				+    HADOOP-3091. Modify FsShell command -put to accept multiple sources.
			
 
				+    (Lohit Vijaya Renu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3092. Show counter values from job -status command.
			
 
				+    (Tom White via ddas)
			
 
				+
			
 
				+    HADOOP-1228.  Ant task to generate Eclipse project files.  (tomwhite)
			
 
				+
			
 
				+    HADOOP-3093. Adds Configuration.getStrings(name, default-value) and
			
 
				+    the corresponding setStrings. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3106. Adds documentation in forrest for debugging.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3099. Add an option to distcp to preserve user, group, and
			
 
				+    permission information. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-2841. Unwrap AccessControlException and FileNotFoundException
			
 
				+    from RemoteException for DFSClient. (shv)
			
 
				+
			
 
				+    HADOOP-3152.  Make index interval configuable when using
			
 
				+    MapFileOutputFormat for map-reduce job.  (Rong-En Fan via cutting)
			
 
				+
			
 
				+    HADOOP-3143. Decrease number of slaves from 4 to 3 in TestMiniMRDFSSort,
			
 
				+    as Hudson generates false negatives under the current load.
			
 
				+    (Nigel Daley via cdouglas)
			
 
				+
			
 
				+    HADOOP-3174. Illustrative example for MultipleFileInputFormat. (Enis
			
 
				+    Soztutar via acmurthy)  
			
 
				+
			
 
				+    HADOOP-2993. Clarify the usage of JAVA_HOME in the Quick Start guide.
			
 
				+    (acmurthy via nigel)
			
 
				+
			
 
				+    HADOOP-3124. Make DataNode socket write timeout configurable. (rangadi)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+    HADOOP-2790.  Fixed inefficient method hasSpeculativeTask by removing
			
 
				+    repetitive calls to get the current time and late checking to see if
			
 
				+    we want speculation on at all. (omalley)
			
 
				+
			
 
				+    HADOOP-2758. Reduce buffer copies in DataNode when data is read from
			
 
				+    HDFS, without negatively affecting read throughput. (rangadi)
			
 
				+
			
 
				+    HADOOP-2399. Input key and value to combiner and reducer is reused.
			
 
				+    (Owen O'Malley via ddas). 
			
 
				+
			
 
				+    HADOOP-2423.  Code optimization in FSNamesystem.mkdirs.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2606. ReplicationMonitor selects data-nodes to replicate directly
			
 
				+    from needed replication blocks instead of looking up for the blocks for 
			
 
				+    each live data-node. (shv)
			
 
				+
			
 
				+    HADOOP-2148. Eliminate redundant data-node blockMap lookups. (shv)
			
 
				+
			
 
				+    HADOOP-2027. Return the number of bytes in each block in a file
			
 
				+    via a single rpc to the namenode to speed up job planning. 
			
 
				+    (Lohit Vijaya Renu via omalley)
			
 
				+
			
 
				+    HADOOP-2902.  Replace uses of "fs.default.name" with calls to the
			
 
				+    accessor methods added in HADOOP-1967.  (cutting)
			
 
				+
			
 
				+    HADOOP-2119.  Optimize scheduling of jobs with large numbers of
			
 
				+    tasks by replacing static arrays with lists of runnable tasks. 
			
 
				+    (Amar Kamat via omalley)
			
 
				+
			
 
				+    HADOOP-2919.  Reduce the number of memory copies done during the
			
 
				+    map output sorting. Also adds two config variables:
			
 
				+    io.sort.spill.percent - the percentages of io.sort.mb that should
			
 
				+                            cause a spill (default 80%)
			
 
				+    io.sort.record.percent - the percent of io.sort.mb that should
			
 
				+                             hold key/value indexes (default 5%)
			
 
				+    (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-3140. Doesn't add a task in the commit queue if the task hadn't
			
 
				+    generated any output. (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-3168. Reduce the amount of logging in streaming to an
			
 
				+    exponentially increasing number of records (up to 10,000
			
 
				+    records/log). (Zheng Shao via omalley)
			
 
				+ 
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2195. '-mkdir' behaviour is now closer to Linux shell in case of
			
 
				+    errors. (Mahadev Konar via rangadi)
			
 
				+    
			
 
				+    HADOOP-2190. bring behaviour '-ls' and '-du' closer to Linux shell 
			
 
				+    commands in case of errors. (Mahadev Konar via rangadi)
			
 
				+    
			
 
				+    HADOOP-2193. 'fs -rm' and 'fs -rmr' show error message when the target
			
 
				+    file does not exist. (Mahadev Konar via rangadi)
			
 
				+            
			
 
				+    HADOOP-2738 Text is not subclassable because set(Text) and compareTo(Object)
			
 
				+    access the other instance's private members directly. (jimk)
			
 
				+
			
 
				+    HADOOP-2779.  Remove the references to HBase in the build.xml. (omalley)
			
 
				+
			
 
				+    HADOOP-2194. dfs cat on a non-existent file throws FileNotFoundException.
			
 
				+    (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-2767. Fix for NetworkTopology erroneously skipping the last leaf 
			
 
				+    node on a rack. (Hairong Kuang and Mark Butler via dhruba)
			
 
				+
			
 
				+    HADOOP-1593. FsShell works with paths in non-default FileSystem.
			
 
				+    (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-2191. du and dus command on non-existent directory gives 
			
 
				+    appropriate error message.  (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-2832. Remove tabs from code of DFSClient for better
			
 
				+    indentation. (dhruba)
			
 
				+
			
 
				+    HADOOP-2844. distcp closes file handles for sequence files.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2727. Fix links in Web UI of the hadoop daemons and some docs
			
 
				+    (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2871. Fixes a problem to do with file: URI in the JobHistory init.
			
 
				+    (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2800.  Deprecate SetFile.Writer constructor not the whole class.
			
 
				+    (Johan Oskarsson via tomwhite)
			
 
				+
			
 
				+    HADOOP-2891.  DFSClient.close() closes all open files. (dhruba)
			
 
				+
			
 
				+    HADOOP-2845.  Fix dfsadmin disk utilization report on Solaris.
			
 
				+    (Martin Traverso via tomwhite)
			
 
				+
			
 
				+    HADOOP-2912. MiniDFSCluster restart should wait for namenode to exit
			
 
				+    safemode. This was causing TestFsck to fail.  (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-2820. The following classes in streaming are removed : 
			
 
				+    StreamLineRecordReader StreamOutputFormat StreamSequenceRecordReader.
			
 
				+    (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2819. The following methods in JobConf are removed:
			
 
				+    getInputKeyClass() setInputKeyClass getInputValueClass()
			
 
				+    setInputValueClass(Class theClass) setSpeculativeExecution
			
 
				+    getSpeculativeExecution() (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2817. Removes deprecated mapred.tasktracker.tasks.maximum and 
			
 
				+    ClusterStatus.getMaxTasks(). (Amareshwari Sri Ramadasu via ddas) 
			
 
				+
			
 
				+    HADOOP-2821. Removes deprecated ShellUtil and ToolBase classes from
			
 
				+    the util package. (Amareshwari Sri Ramadasu via ddas) 
			
 
				+
			
 
				+    HADOOP-2934. The namenode was encountreing a NPE while loading
			
 
				+    leases from the fsimage. Fixed. (dhruba)
			
 
				+
			
 
				+    HADOOP-2938. Some fs commands did not glob paths.
			
 
				+    (Tsz Wo (Nicholas), SZE via rangadi)
			
 
				+
			
 
				+    HADOOP-2943. Compression of intermediate map output causes failures
			
 
				+    in the merge. (cdouglas)
			
 
				+
			
 
				+    HADOOP-2870.  DataNode and NameNode closes all connections while
			
 
				+    shutting down. (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-2973. Fix TestLocalDFS for Windows platform.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2971. select multiple times if it returns early in 
			
 
				+    SocketIOWithTimeout. (rangadi)
			
 
				+
			
 
				+    HADOOP-2955. Fix TestCrcCorruption test failures caused by HADOOP-2758
			
 
				+    (rangadi)
			
 
				+
			
 
				+    HADOOP-2657. A flush call on the DFSOutputStream flushes the last
			
 
				+    partial CRC chunk too.  (dhruba)
			
 
				+
			
 
				+    HADOOP-2974. IPC unit tests used "0.0.0.0" to connect to server, which
			
 
				+    is not always supported. (rangadi)
			
 
				+
			
 
				+    HADOOP-2996. Fixes uses of StringBuffer in StreamUtils class.
			
 
				+    (Dave Brosius via ddas)
			
 
				+
			
 
				+    HADOOP-2995. Fixes StreamBaseRecordReader's getProgress to return a 
			
 
				+    floating point number. (Dave Brosius via ddas)
			
 
				+
			
 
				+    HADOOP-2972. Fix for a NPE in FSDataset.invalidate.
			
 
				+    (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-2994. Code cleanup for DFSClient: remove redundant 
			
 
				+    conversions from string to string.  (Dave Brosius via dhruba)
			
 
				+
			
 
				+    HADOOP-3009. TestFileCreation sometimes fails because restarting
			
 
				+    minidfscluster sometimes creates datanodes with ports that are
			
 
				+    different from their original instance. (dhruba)
			
 
				+
			
 
				+    HADOOP-2992. Distributed Upgrade framework works correctly with
			
 
				+    more than one upgrade object.  (Konstantin Shvachko via dhruba)
			
 
				+
			
 
				+    HADOOP-2679. Fix a typo in libhdfs.  (Jason via dhruba)
			
 
				+
			
 
				+    HADOOP-2976. When a lease expires, the Namenode ensures that 
			
 
				+    blocks of the file are adequately replicated. (dhruba)
			
 
				+
			
 
				+    HADOOP-2901. Fixes the creation of info servers in the JobClient
			
 
				+    and JobTracker. Removes the creation from JobClient and removes
			
 
				+    additional info server from the JobTracker. Also adds the command
			
 
				+    line utility to view the history files (HADOOP-2896), and fixes
			
 
				+    bugs in JSPs to do with analysis - HADOOP-2742, HADOOP-2792.
			
 
				+    (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2890. If different datanodes report the same block but
			
 
				+    with different sizes to the namenode, the namenode picks the
			
 
				+    replica(s) with the largest size as the only valid replica(s). (dhruba)
			
 
				+
			
 
				+    HADOOP-2825. Deprecated MapOutputLocation.getFile() is removed.
			
 
				+    (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2806. Fixes a streaming document.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3008. SocketIOWithTimeout throws InterruptedIOException if the
			
 
				+    thread is interrupted while it is waiting. (rangadi)
			
 
				+    
			
 
				+    HADOOP-3006. Fix wrong packet size reported by DataNode when a block
			
 
				+    is being replicated. (rangadi)
			
 
				+
			
 
				+    HADOOP-3029. Datanode prints log message "firstbadlink" only if 
			
 
				+    it detects a bad connection to another datanode in the pipeline. (dhruba)
			
 
				+
			
 
				+    HADOOP-3030. Release reserved space for file in InMemoryFileSystem if
			
 
				+    checksum reservation fails. (Devaraj Das via cdouglas)
			
 
				+
			
 
				+    HADOOP-3036. Fix findbugs warnings in UpgradeUtilities. (Konstantin
			
 
				+    Shvachko via cdouglas)
			
 
				+
			
 
				+    HADOOP-3025. ChecksumFileSystem supports the delete method with 
			
 
				+    the recursive flag. (Mahadev Konar via dhruba)
			
 
				+
			
 
				+    HADOOP-3012. dfs -mv file to user home directory throws exception if 
			
 
				+    the user home directory does not exist. (Mahadev Konar via dhruba)
			
 
				+    
			
 
				+    HADOOP-3066. Should not require superuser privilege to query if hdfs is in
			
 
				+    safe mode (jimk)
			
 
				+
			
 
				+    HADOOP-3040. If the input line starts with the separator char, the key
			
 
				+    is set as empty. (Amareshwari Sriramadasu via ddas) 
			
 
				+
			
 
				+    HADOOP-3080. Removes flush calls from JobHistory.
			
 
				+    (Amareshwari Sriramadasu via ddas) 
			
 
				+
			
 
				+    HADOOP-3086. Adds the testcase missed during commit of hadoop-3040.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3046. Fix the raw comparators for Text and BytesWritables
			
 
				+    to use the provided length rather than recompute it. (omalley)
			
 
				+
			
 
				+    HADOOP-3094. Fix BytesWritable.toString to avoid extending the sign bit
			
 
				+    (Owen O'Malley via cdouglas)
			
 
				+
			
 
				+    HADOOP-3067. DFSInputStream's position read does not close the sockets.
			
 
				+    (rangadi)
			
 
				+
			
 
				+    HADOOP-3073. close() on SocketInputStream or SocketOutputStream should
			
 
				+    close the underlying channel. (rangadi)
			
 
				+
			
 
				+    HADOOP-3087. Fixes a problem to do with refreshing of loadHistory.jsp.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3065. Better logging message if the rack location of a datanode
			
 
				+    cannot be determined.  (Devaraj Das via dhruba)
			
 
				+
			
 
				+    HADOOP-3064. Commas in a file path should not be treated as delimiters.
			
 
				+    (Hairong Kuang via shv)
			
 
				+
			
 
				+    HADOOP-2997. Adds test for non-writable serialier. Also fixes a problem 
			
 
				+    introduced by HADOOP-2399. (Tom White via ddas)
			
 
				+
			
 
				+    HADOOP-3114. Fix TestDFSShell on Windows. (Lohit Vijaya Renu via cdouglas)
			
 
				+
			
 
				+    HADOOP-3118.  Fix Namenode NPE while loading fsimage after a cluster 
			
 
				+    upgrade from older disk format. (dhruba)
			
 
				+
			
 
				+    HADOOP-3161. Fix FIleUtil.HardLink.getLinkCount on Mac OS. (nigel
			
 
				+    via omalley)
			
 
				+
			
 
				+    HADOOP-2927. Fix TestDU to acurately calculate the expected file size.
			
 
				+    (shv via nigel)
			
 
				+
			
 
				+    HADOOP-3123. Fix the native library build scripts to work on Solaris.
			
 
				+    (tomwhite via omalley)
			
 
				+
			
 
				+    HADOOP-3089.  Streaming should accept stderr from task before
			
 
				+    first key arrives.  (Rick Cox via tomwhite)
			
 
				+
			
 
				+    HADOOP-3146. A DFSOutputStream.flush method is renamed as
			
 
				+    DFSOutputStream.fsync.  (dhruba)
			
 
				+
			
 
				+    HADOOP-3165. -put/-copyFromLocal did not treat input file "-" as stdin.
			
 
				+    (Lohit Vijayarenu via rangadi)
			
 
				+
			
 
				+    HADOOP-3041. Deprecate JobConf.setOutputPath and JobConf.getOutputPath.
			
 
				+    Deprecate OutputFormatBase. Add FileOutputFormat. Existing output formats
			
 
				+    extending OutputFormatBase, now extend FileOutputFormat. Add the following
			
 
				+    APIs in FileOutputFormat: setOutputPath, getOutputPath, getWorkOutputPath.
			
 
				+    (Amareshwari Sriramadasu via nigel)
			
 
				+
			
 
				+    HADOOP-3083. The fsimage does not store leases. This would have to be
			
 
				+    reworked in the next release to support appends. (dhruba)
			
 
				+
			
 
				+    HADOOP-3166. Fix an ArrayIndexOutOfBoundsException in the spill thread
			
 
				+    and make exception handling more promiscuous to catch this condition.
			
 
				+    (cdouglas)
			
 
				+
			
 
				+    HADOOP-3050. DataNode sends one and only one block report after
			
 
				+    it registers with the namenode. (Hairong Kuang)
			
 
				+
			
 
				+    HADOOP-3044. NNBench sets the right configuration for the mapper.
			
 
				+    (Hairong Kuang)
			
 
				+
			
 
				+    HADOOP-3178. Fix GridMix scripts for small and medium jobs
			
 
				+    to handle input paths differently. (Mukund Madhugiri via nigel)
			
 
				+
			
 
				+    HADOOP-1911. Fix an infinite loop in DFSClient when all replicas of a
			
 
				+    block are bad (cdouglas)
			
 
				+
			
 
				+    HADOOP-3157. Fix path handling in DistributedCache and TestMiniMRLocalFS.
			
 
				+    (Doug Cutting via rangadi) 
			
 
				+
			
 
				+    HADOOP-3018. Fix the eclipse plug-in contrib wrt removed deprecated
			
 
				+    methods (taton)
			
 
				+
			
 
				+    HADOOP-3183. Fix TestJobShell to use 'ls' instead of java.io.File::exists
			
 
				+    since cygwin symlinks are unsupported.
			
 
				+    (Mahadev konar via cdouglas)
			
 
				+
			
 
				+    HADOOP-3175. Fix FsShell.CommandFormat to handle "-" in arguments.
			
 
				+    (Edward J. Yoon via rangadi)
			
 
				+
			
 
				+    HADOOP-3220. Safemode message corrected. (shv)
			
 
				+
			
 
				+    HADOOP-3208. Fix WritableDeserializer to set the Configuration on
			
 
				+    deserialized Writables. (Enis Soztutar via cdouglas)
			
 
				+
			
 
				+   HADOOP-3224. 'dfs -du /dir' does not return correct size.
			
 
				+   (Lohit Vjayarenu via rangadi)
			
 
				+
			
 
				+   HADOOP-3223. Fix typo in help message for -chmod. (rangadi)
			
 
				+
			
 
				+   HADOOP-1373. checkPath() should ignore case when it compares authoriy.
			
 
				+   (Edward J. Yoon via rangadi)
			
 
				+
			
 
				+   HADOOP-3204. Fixes a problem to do with ReduceTask's LocalFSMerger not
			
 
				+   catching Throwable.  (Amar Ramesh Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-3229. Report progress when collecting records from the mapper and
			
 
				+    the combiner. (Doug Cutting via cdouglas)
			
 
				+
			
 
				+    HADOOP-3225. Unwrapping methods of RemoteException should initialize
			
 
				+    detailedMassage field. (Mahadev Konar, shv, cdouglas)
			
 
				+
			
 
				+    HADOOP-3247. Fix gridmix scripts to use the correct globbing syntax and
			
 
				+    change maxentToSameCluster to run the correct number of jobs.
			
 
				+    (Runping Qi via cdouglas)
			
 
				+
			
 
				+    HADOOP-3242. Fix the RecordReader of SequenceFileAsBinaryInputFormat to
			
 
				+    correctly read from the start of the split and not the beginning of the
			
 
				+    file. (cdouglas via acmurthy) 
			
 
				+
			
 
				+    HADOOP-3256. Encodes the job name used in the filename for history files.
			
 
				+    (Arun Murthy via ddas)
			
 
				+
			
 
				+    HADOOP-3162. Ensure that comma-separated input paths are treated correctly
			
 
				+    as multiple input paths. (Amareshwari Sri Ramadasu via acmurthy)
			
 
				+
			
 
				+    HADOOP-3263. Ensure that the job-history log file always follows the
			
 
				+    pattern of hostname_timestamp_jobid_username_jobname even if username
			
 
				+    and/or jobname are not specfied. This helps to avoid wrong assumptions
			
 
				+    made about the job-history log filename in jobhistory.jsp. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-3251. Fixes getFilesystemName in JobTracker and LocalJobRunner to
			
 
				+    use FileSystem.getUri instead of FileSystem.getName. (Arun Murthy via ddas)
			
 
				+
			
 
				+    HADOOP-3237. Fixes TestDFSShell.testErrOutPut on Windows platform.
			
 
				+    (Mahadev Konar via ddas)
			
 
				+
			
 
				+    HADOOP-3279. TaskTracker checks for SUCCEEDED task status in addition to 
			
 
				+    COMMIT_PENDING status when it fails maps due to lost map.
			
 
				+    (Devaraj Das)
			
 
				+
			
 
				+    HADOOP-3286. Prevent collisions in gridmix output dirs by increasing the
			
 
				+    granularity of the timestamp. (Runping Qi via cdouglas)
			
 
				+
			
 
				+    HADOOP-3285. Fix input split locality when the splits align to
			
 
				+    fs blocks. (omalley)
			
 
				+
			
 
				+    HADOOP-3372. Fix heap management in streaming tests. (Arun Murthy via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    HADOOP-3031. Fix javac warnings in test classes. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3382. Fix memory leak when files are not cleanly closed (rangadi)
			
 
				+
			
 
				+    HADOOP-3322. Fix to push MetricsRecord for rpc metrics. (Eric Yang via
			
 
				+    mukund)
			
 
				+
			
 
				+Release 0.16.4 - 2008-05-05
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3138. DFS mkdirs() should not throw an exception if the directory
			
 
				+    already exists. (rangadi via mukund)
			
 
				+
			
 
				+    HADOOP-3294. Fix distcp to check the destination length and retry the copy
			
 
				+    if it doesn't match the src length. (Tsz Wo (Nicholas), SZE via mukund)
			
 
				+
			
 
				+    HADOOP-3186. Fix incorrect permission checkding for mv and renameTo
			
 
				+    in HDFS. (Tsz Wo (Nicholas), SZE via mukund)
			
 
				+
			
 
				+Release 0.16.3 - 2008-04-16
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3010. Fix ConcurrentModificationException in ipc.Server.Responder.
			
 
				+    (rangadi)
			
 
				+
			
 
				+    HADOOP-3154. Catch all Throwables from the SpillThread in MapTask, rather
			
 
				+    than IOExceptions only. (ddas via cdouglas)
			
 
				+
			
 
				+    HADOOP-3159. Avoid file system cache being overwritten whenever
			
 
				+    configuration is modified. (Tsz Wo (Nicholas), SZE via hairong)
			
 
				+
			
 
				+    HADOOP-3139. Remove the consistency check for the FileSystem cache in
			
 
				+    closeAll() that causes spurious warnings and a deadlock.
			
 
				+    (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3195. Fix TestFileSystem to be deterministic.
			
 
				+    (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-3069. Primary name-node should not truncate image when transferring
			
 
				+    it from the secondary. (shv)
			
 
				+
			
 
				+    HADOOP-3182. Change permissions of the job-submission directory to 777
			
 
				+    from 733 to ensure sharing of HOD clusters works correctly. (Tsz Wo
			
 
				+    (Nicholas), Sze and Amareshwari Sri Ramadasu via acmurthy) 
			
 
				+
			
 
				+Release 0.16.2 - 2008-04-02
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3011. Prohibit distcp from overwriting directories on the
			
 
				+    destination filesystem with files. (cdouglas)
			
 
				+
			
 
				+    HADOOP-3033. The BlockReceiver thread in the datanode writes data to 
			
 
				+    the block file, changes file position (if needed) and flushes all by
			
 
				+    itself. The PacketResponder thread does not flush block file. (dhruba)
			
 
				+
			
 
				+    HADOOP-2978. Fixes the JobHistory log format for counters.
			
 
				+    (Runping Qi via ddas)
			
 
				+
			
 
				+    HADOOP-2985. Fixes LocalJobRunner to tolerate null job output path.
			
 
				+    Also makes the _temporary a constant in MRConstants.java.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3003. FileSystem cache key is updated after a 
			
 
				+    FileSystem object is created. (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-3042. Updates the Javadoc in JobConf.getOutputPath to reflect 
			
 
				+    the actual temporary path. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3007. Tolerate mirror failures while DataNode is replicating
			
 
				+    blocks as it used to before. (rangadi)
			
 
				+
			
 
				+    HADOOP-2944. Fixes a "Run on Hadoop" wizard NPE when creating a
			
 
				+    Location from the wizard. (taton)
			
 
				+
			
 
				+    HADOOP-3049. Fixes a problem in MultiThreadedMapRunner to do with
			
 
				+    catching RuntimeExceptions. (Alejandro Abdelnur via ddas)
			
 
				+
			
 
				+    HADOOP-3039. Fixes a problem to do with exceptions in tasks not
			
 
				+    killing jobs. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3027. Fixes a problem to do with adding a shutdown hook in
			
 
				+    FileSystem.  (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-3056. Fix distcp when the target is an empty directory by
			
 
				+    making sure the directory is created first. (cdouglas and acmurthy 
			
 
				+    via omalley)
			
 
				+
			
 
				+    HADOOP-3070. Protect the trash emptier thread from null pointer
			
 
				+    exceptions. (Koji Noguchi via omalley)
			
 
				+
			
 
				+    HADOOP-3084. Fix HftpFileSystem to work for zero-lenghth files.
			
 
				+    (cdouglas)
			
 
				+
			
 
				+    HADOOP-3107. Fix NPE when fsck invokes getListings. (dhruba)
			
 
				+
			
 
				+    HADOOP-3104. Limit MultithreadedMapRunner to have a fixed length queue
			
 
				+    between the RecordReader and the map threads. (Alejandro Abdelnur via
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-2833. Do not use "Dr. Who" as the default user in JobClient. 
			
 
				+    A valid user name is required. (Tsz Wo (Nicholas), SZE via rangadi)
			
 
				+
			
 
				+    HADOOP-3128. Throw RemoteException in setPermissions and setOwner of 
			
 
				+    DistributedFileSystem.  (shv via nigel)
			
 
				+
			
 
				+Release 0.16.1 - 2008-03-13
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-2869. Deprecate SequenceFile.setCompressionType in favor of
			
 
				+    SequenceFile.createWriter, SequenceFileOutputFormat.setCompressionType,
			
 
				+    and JobConf.setMapOutputCompressionType. (Arun C Murthy via cdouglas)
			
 
				+    Configuration changes to hadoop-default.xml:
			
 
				+      deprecated io.seqfile.compression.type
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-2371. User guide for file permissions in HDFS.
			
 
				+    (Robert Chansler via rangadi)
			
 
				+
			
 
				+    HADOOP-3098. Allow more characters in user and group names while
			
 
				+    using -chown and -chgrp commands. (rangadi)
			
 
				+    
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2789. Race condition in IPC Server Responder that could close
			
 
				+    connections early. (Raghu Angadi)
			
 
				+    
			
 
				+    HADOOP-2785. minor. Fix a typo in Datanode block verification 
			
 
				+    (Raghu Angadi)
			
 
				+    
			
 
				+    HADOOP-2788. minor. Fix help message for chgrp shell command (Raghu Angadi).
			
 
				+    
			
 
				+    HADOOP-1188. fstime file is updated when a storage directory containing
			
 
				+    namespace image becomes inaccessible. (shv)
			
 
				+
			
 
				+    HADOOP-2787. An application can set a configuration variable named
			
 
				+    dfs.umask to set the umask that is used by DFS.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2780. The default socket buffer size for DataNodes is 128K.
			
 
				+    (dhruba)
			
 
				+
			
 
				+    HADOOP-2716. Superuser privileges for the Balancer.
			
 
				+    (Tsz Wo (Nicholas), SZE via shv)
			
 
				+
			
 
				+    HADOOP-2754. Filter out .crc files from local file system listing.
			
 
				+    (Hairong Kuang via shv)
			
 
				+
			
 
				+    HADOOP-2733. Fix compiler warnings in test code.
			
 
				+    (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HADOOP-2725. Modify distcp to avoid leaving partially copied files at
			
 
				+    the destination after encountering an error. (Tsz Wo (Nicholas), SZE
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-2391. Cleanup job output directory before declaring a job as
			
 
				+    SUCCESSFUL. (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2808. Minor fix to FileUtil::copy to mind the overwrite
			
 
				+    formal. (cdouglas)
			
 
				+
			
 
				+    HADOOP-2683. Moving UGI out of the RPC Server.
			
 
				+    (Tsz Wo (Nicholas), SZE via shv)
			
 
				+
			
 
				+    HADOOP-2814. Fix for NPE in datanode in unit test TestDataTransferProtocol.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-2811. Dump of counters in job history does not add comma between
			
 
				+    groups. (runping via omalley)
			
 
				+
			
 
				+    HADOOP-2735. Enables setting TMPDIR for tasks. 
			
 
				+    (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2843. Fix protections on map-side join classes to enable derivation.
			
 
				+    (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-2840. Fix gridmix scripts to correctly invoke the java sort through
			
 
				+    the proper jar. (Mukund Madhugiri via cdouglas)
			
 
				+
			
 
				+    HADOOP-2769.  TestNNThroughputBnechmark should not use a fixed port for
			
 
				+    the namenode http port. (omalley)
			
 
				+
			
 
				+    HADOOP-2852. Update gridmix benchmark to avoid an artifically long tail.
			
 
				+    (cdouglas)
			
 
				+
			
 
				+    HADOOP-2894. Fix a problem to do with tasktrackers failing to connect to
			
 
				+    JobTracker upon reinitialization. (Owen O'Malley via ddas).
			
 
				+
			
 
				+    HADOOP-2903.  Fix exception generated by Metrics while using pushMetric().
			
 
				+    (girish vaitheeswaran via dhruba)
			
 
				+
			
 
				+    HADOOP-2904.  Fix to RPC metrics to log the correct host name. 
			
 
				+    (girish vaitheeswaran via dhruba)
			
 
				+
			
 
				+    HADOOP-2918.  Improve error logging so that dfs writes failure with
			
 
				+    "No lease on file" can be diagnosed. (dhruba)
			
 
				+
			
 
				+    HADOOP-2923.  Add SequenceFileAsBinaryInputFormat, which was
			
 
				+    missed in the commit for HADOOP-2603. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-2931. IOException thrown by DFSOutputStream had wrong stack
			
 
				+    trace in some cases. (Michael Bieniosek via rangadi)
			
 
				+
			
 
				+    HADOOP-2883. Write failures and data corruptions on HDFS files.
			
 
				+    The write timeout is back to what it was on 0.15 release. Also, the
			
 
				+    datnodes flushes the block file buffered output stream before
			
 
				+    sending a positive ack for the packet back to the client. (dhruba)
			
 
				+
			
 
				+    HADOOP-2756. NPE in DFSClient while closing DFSOutputStreams 
			
 
				+    under load. (rangadi)
			
 
				+
			
 
				+    HADOOP-2958. Fixed FileBench which broke due to HADOOP-2391 which performs
			
 
				+    a check for existence of the output directory and a trivial bug in
			
 
				+    GenericMRLoadGenerator where min/max word lenghts were identical since
			
 
				+    they were looking at the same config variables (Chris Douglas via
			
 
				+    acmurthy) 
			
 
				+
			
 
				+    HADOOP-2915. Fixed FileSystem.CACHE so that a username is included
			
 
				+    in the cache key. (Tsz Wo (Nicholas), SZE via nigel)
			
 
				+
			
 
				+    HADOOP-2813. TestDU unit test uses its own directory to run its 
			
 
				+    sequence of tests.  (Mahadev Konar via dhruba)
			
 
				+
			
 
				+Release 0.16.0 - 2008-02-07
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-1245.  Use the mapred.tasktracker.tasks.maximum value
			
 
				+    configured on each tasktracker when allocating tasks, instead of
			
 
				+    the value configured on the jobtracker. InterTrackerProtocol
			
 
				+    version changed from 5 to 6. (Michael Bieniosek via omalley)
			
 
				+
			
 
				+    HADOOP-1843. Removed code from Configuration and JobConf deprecated by 
			
 
				+    HADOOP-785 and a minor fix to Configuration.toString. Specifically the 
			
 
				+    important change is that mapred-default.xml is no longer supported and 
			
 
				+    Configuration no longer supports the notion of default/final resources.
			
 
				+    (acmurthy) 
			
 
				+
			
 
				+    HADOOP-1302.  Remove deprecated abacus code from the contrib directory.
			
 
				+    This also fixes a configuration bug in AggregateWordCount, so that the
			
 
				+    job now works.  (enis)
			
 
				+
			
 
				+    HADOOP-2288.  Enhance FileSystem API to support access control.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2184.  RPC Support for user permissions and authentication.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-2185.  RPC Server uses any available port if the specified
			
 
				+    port is zero. Otherwise it uses the specified port. Also combines
			
 
				+    the configuration attributes for the servers' bind address and
			
 
				+    port from "x.x.x.x" and "y" to "x.x.x.x:y". 
			
 
				+    Deprecated configuration variables:
			
 
				+      dfs.info.bindAddress
			
 
				+      dfs.info.port
			
 
				+      dfs.datanode.bindAddress
			
 
				+      dfs.datanode.port
			
 
				+      dfs.datanode.info.bindAdress
			
 
				+      dfs.datanode.info.port
			
 
				+      dfs.secondary.info.bindAddress
			
 
				+      dfs.secondary.info.port
			
 
				+      mapred.job.tracker.info.bindAddress
			
 
				+      mapred.job.tracker.info.port
			
 
				+      mapred.task.tracker.report.bindAddress
			
 
				+      tasktracker.http.bindAddress
			
 
				+      tasktracker.http.port
			
 
				+    New configuration variables (post HADOOP-2404):
			
 
				+      dfs.secondary.http.address
			
 
				+      dfs.datanode.address
			
 
				+      dfs.datanode.http.address
			
 
				+      dfs.http.address
			
 
				+      mapred.job.tracker.http.address
			
 
				+      mapred.task.tracker.report.address
			
 
				+      mapred.task.tracker.http.address
			
 
				+    (Konstantin Shvachko via dhruba)
			
 
				+
			
 
				+    HADOOP-2401.  Only the current leaseholder can abandon a block for
			
 
				+    a HDFS file.  ClientProtocol version changed from 20 to 21.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2381.  Support permission information in FileStatus. Client
			
 
				+    Protocol version changed from 21 to 22.  (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-2110. Block report processing creates fewer transient objects.
			
 
				+    Datanode Protocol version changed from 10 to 11.  
			
 
				+    (Sanjay Radia via dhruba)
			
 
				+    
			
 
				+    HADOOP-2567.  Add FileSystem#getHomeDirectory(), which returns the
			
 
				+    user's home directory in a FileSystem as a fully-qualified path.
			
 
				+    FileSystem#getWorkingDirectory() is also changed to return a
			
 
				+    fully-qualified path, which can break applications that attempt
			
 
				+    to, e.g., pass LocalFileSystem#getWorkingDir().toString() directly
			
 
				+    to java.io methods that accept file names. (cutting)
			
 
				+
			
 
				+    HADOOP-2514.  Change trash feature to maintain a per-user trash
			
 
				+    directory, named ".Trash" in the user's home directory.  The
			
 
				+    "fs.trash.root" parameter is no longer used.  Full source paths
			
 
				+    are also no longer reproduced within the trash.
			
 
				+
			
 
				+    HADOOP-2012. Periodic data verification on Datanodes.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1707. The DFSClient does not use a local disk file to cache
			
 
				+    writes to a HDFS file. Changed Data Transfer Version from 7 to 8.
			
 
				+    (dhruba)
			
 
				+
			
 
				+    HADOOP-2652. Fix permission issues for HftpFileSystem. This is an 
			
 
				+    incompatible change since distcp may not be able to copy files 
			
 
				+    from cluster A (compiled with this patch) to cluster B (compiled 
			
 
				+    with previous versions). (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-1857.  Ability to run a script when a task fails to capture stack
			
 
				+    traces. (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2299.  Defination of a login interface.  A simple implementation for
			
 
				+    Unix users and groups. (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-1652.  A utility to balance data among datanodes in a HDFS cluster.
			
 
				+    (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-2085.  A library to support map-side joins of consistently 
			
 
				+    partitioned and sorted data sets. (Chris Douglas via omalley)
			
 
				+
			
 
				+    HADOOP-2336. Shell commands to modify file permissions. (rangadi)
			
 
				+
			
 
				+    HADOOP-1298. Implement file permissions for HDFS.
			
 
				+    (Tsz Wo (Nicholas) & taton via cutting)
			
 
				+
			
 
				+    HADOOP-2447. HDFS can be configured to limit the total number of 
			
 
				+    objects (inodes and blocks) in the file system. (dhruba)
			
 
				+
			
 
				+    HADOOP-2487. Added an option to get statuses for all submitted/run jobs.
			
 
				+    This information can be used to develop tools for analysing jobs.
			
 
				+    (Amareshwari Sri Ramadasu via acmurthy)
			
 
				+
			
 
				+    HADOOP-1873. Implement user permissions for Map/Reduce framework.
			
 
				+    (Hairong Kuang via shv)
			
 
				+
			
 
				+    HADOOP-2532.  Add to MapFile a getClosest method that returns the key
			
 
				+    that comes just before if the key is not present.  (stack via tomwhite)
			
 
				+   
			
 
				+    HADOOP-1883. Add versioning to Record I/O. (Vivek Ratan via ddas)
			
 
				+
			
 
				+    HADOOP-2603.  Add SeqeunceFileAsBinaryInputFormat, which reads
			
 
				+    sequence files as BytesWritable/BytesWritable regardless of the
			
 
				+    key and value types used to write the file. (cdouglas via omalley)
			
 
				+
			
 
				+    HADOOP-2367. Add ability to profile a subset of map/reduce tasks and fetch
			
 
				+    the result to the local filesystem of the submitting application. Also
			
 
				+    includes a general IntegerRanges extension to Configuration for setting
			
 
				+    positive, ranged parameters. (Owen O'Malley via cdouglas)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-2045.  Change committer list on website to a table, so that
			
 
				+    folks can list their organization, timezone, etc.  (cutting)
			
 
				+
			
 
				+    HADOOP-2058.  Facilitate creating new datanodes dynamically in
			
 
				+    MiniDFSCluster. (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-1855.  fsck verifies block placement policies and reports
			
 
				+    violations.  (Konstantin Shvachko via dhruba)
			
 
				+
			
 
				+    HADOOP-1604.  An system administrator can finalize namenode upgrades 
			
 
				+    without running the cluster. (Konstantin Shvachko via dhruba)
			
 
				+
			
 
				+    HADOOP-1839.  Link-ify the Pending/Running/Complete/Killed grid in
			
 
				+    jobdetails.jsp to help quickly narrow down and see categorized TIPs' 
			
 
				+    details via jobtasks.jsp. (Amar Kamat via acmurthy)
			
 
				+
			
 
				+    HADOOP-1210.  Log counters in job history. (Owen O'Malley via ddas)
			
 
				+
			
 
				+    HADOOP-1912. Datanode has two new commands COPY and REPLACE. These are
			
 
				+    needed for supporting data rebalance.  (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-2086. This patch adds the ability to add dependencies to a job
			
 
				+    (run via JobControl) after construction.  (Adrian Woodhead via ddas)
			
 
				+
			
 
				+    HADOOP-1185. Support changing the logging level of a server without 
			
 
				+    restarting the server.  (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2134.  Remove developer-centric requirements from overview.html and
			
 
				+    keep it end-user focussed, specifically sections related to subversion and
			
 
				+    building Hadoop. (Jim Kellerman via acmurthy)
			
 
				+
			
 
				+    HADOOP-1989. Support simulated DataNodes. This helps creating large virtual
			
 
				+    clusters for testing purposes.  (Sanjay Radia via dhruba)
			
 
				+    
			
 
				+    HADOOP-1274. Support different number of mappers and reducers per
			
 
				+    TaskTracker to  allow administrators to better configure and utilize
			
 
				+    heterogenous clusters. 
			
 
				+    Configuration changes to hadoop-default.xml:
			
 
				+      add mapred.tasktracker.map.tasks.maximum (default value of 2)
			
 
				+      add mapred.tasktracker.reduce.tasks.maximum (default value of 2)
			
 
				+      remove mapred.tasktracker.tasks.maximum (deprecated for 0.16.0)
			
 
				+    (Amareshwari Sri Ramadasu via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2104. Adds a description to the ant targets. This makes the 
			
 
				+    output of "ant -projecthelp" sensible. (Chris Douglas via ddas)
			
 
				+
			
 
				+    HADOOP-2127. Added a pipes sort example to benchmark trivial pipes
			
 
				+    application versus trivial java application. (omalley via acmurthy)
			
 
				+
			
 
				+    HADOOP-2113. A new shell command "dfs -text" to view the contents of
			
 
				+    a gziped or SequenceFile. (Chris Douglas via dhruba)
			
 
				+
			
 
				+    HADOOP-2207.  Add a "package" target for contrib modules that
			
 
				+    permits each to determine what files are copied into release
			
 
				+    builds.  (stack via cutting)
			
 
				+
			
 
				+    HADOOP-1984. Makes the backoff for failed fetches exponential. 
			
 
				+    Earlier, it was a random backoff from an interval. 
			
 
				+    (Amar Kamat via ddas)
			
 
				+
			
 
				+    HADOOP-1327.  Include website documentation for streaming. (Rob Weltman
			
 
				+    via omalley)
			
 
				+
			
 
				+    HADOOP-2000.  Rewrite NNBench to measure namenode performance accurately.
			
 
				+    It now uses the map-reduce framework for load generation.
			
 
				+    (Mukund Madhugiri via dhruba)
			
 
				+
			
 
				+    HADOOP-2248. Speeds up the framework w.r.t Counters. Also has API
			
 
				+    updates to the Counters part. (Owen O'Malley via ddas)
			
 
				+
			
 
				+    HADOOP-2326. The initial block report at Datanode startup time has
			
 
				+    a random backoff period.  (Sanjay Radia via dhruba)
			
 
				+
			
 
				+    HADOOP-2432. HDFS includes the name of the file while throwing 
			
 
				+    "File does not exist"  exception. (Jim Kellerman via dhruba)
			
 
				+
			
 
				+    HADOOP-2457. Added a 'forrest.home' property to the 'docs' target in
			
 
				+    build.xml. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-2149.  A new benchmark for three name-node operation: file create, 
			
 
				+    open, and block report, to evaluate the name-node performance 
			
 
				+    for optimizations or new features. (Konstantin Shvachko via shv)
			
 
				+
			
 
				+    HADOOP-2466. Change FileInputFormat.computeSplitSize to a protected
			
 
				+    non-static method to allow sub-classes to provide alternate
			
 
				+    implementations. (Alejandro Abdelnur via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2425. Change TextOutputFormat to handle Text specifically for better
			
 
				+    performance. Make NullWritable implement Comparable. Make TextOutputFormat
			
 
				+    treat NullWritable like null. (omalley)
			
 
				+
			
 
				+    HADOOP-1719. Improves the utilization of shuffle copier threads.
			
 
				+    (Amar Kamat via ddas)
			
 
				+ 
			
 
				+    HADOOP-2390. Added documentation for user-controls for intermediate
			
 
				+    map-outputs & final job-outputs and native-hadoop libraries. (acmurthy) 
			
 
				+ 
			
 
				+    HADOOP-1660. Add the cwd of the map/reduce task to the java.library.path
			
 
				+    of the child-jvm to support loading of native libraries distributed via
			
 
				+    the DistributedCache. (acmurthy)
			
 
				+ 
			
 
				+    HADOOP-2285. Speeds up TextInputFormat. Also includes updates to the
			
 
				+    Text API. (Owen O'Malley via cdouglas)
			
 
				+
			
 
				+    HADOOP-2233. Adds a generic load generator for modeling MR jobs. (cdouglas)
			
 
				+
			
 
				+    HADOOP-2369. Adds a set of scripts for simulating a mix of user map/reduce
			
 
				+    workloads. (Runping Qi via cdouglas)
			
 
				+
			
 
				+    HADOOP-2547. Removes use of a 'magic number' in build.xml. 
			
 
				+    (Hrishikesh via nigel)
			
 
				+
			
 
				+    HADOOP-2268. Fix org.apache.hadoop.mapred.jobcontrol classes to use the
			
 
				+    List/Map interfaces rather than concrete ArrayList/HashMap classes
			
 
				+    internally. (Adrian Woodhead via acmurthy)
			
 
				+
			
 
				+    HADOOP-2406. Add a benchmark for measuring read/write performance through
			
 
				+    the InputFormat interface, particularly with compression. (cdouglas)
			
 
				+
			
 
				+    HADOOP-2131. Allow finer-grained control over speculative-execution. Now
			
 
				+    users can set it for maps and reduces independently.
			
 
				+    Configuration changes to hadoop-default.xml:
			
 
				+      deprecated mapred.speculative.execution
			
 
				+      add mapred.map.tasks.speculative.execution
			
 
				+      add mapred.reduce.tasks.speculative.execution
			
 
				+    (Amareshwari Sri Ramadasu via acmurthy) 
			
 
				+      
			
 
				+    HADOOP-1965. Interleave sort/spill in teh map-task along with calls to the
			
 
				+    Mapper.map method. This is done by splitting the 'io.sort.mb' buffer into
			
 
				+    two and using one half for collecting map-outputs and the other half for
			
 
				+    sort/spill. (Amar Kamat via acmurthy)
			
 
				+    
			
 
				+    HADOOP-2464. Unit tests for chmod, chown, and chgrp using DFS.
			
 
				+    (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-1876. Persist statuses of completed jobs in HDFS so that the
			
 
				+    JobClient can query and get information about decommissioned jobs and also
			
 
				+    across JobTracker restarts.
			
 
				+    Configuration changes to hadoop-default.xml:
			
 
				+      add mapred.job.tracker.persist.jobstatus.active (default value of false)
			
 
				+      add mapred.job.tracker.persist.jobstatus.hours (default value of 0)
			
 
				+      add mapred.job.tracker.persist.jobstatus.dir (default value of
			
 
				+                                                    /jobtracker/jobsInfo)
			
 
				+    (Alejandro Abdelnur via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2077. Added version and build information to STARTUP_MSG for all
			
 
				+    hadoop daemons to aid error-reporting, debugging etc. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-2398. Additional instrumentation for NameNode and RPC server.
			
 
				+    Add support for accessing instrumentation statistics via JMX.
			
 
				+    (Sanjay radia via dhruba)
			
 
				+
			
 
				+    HADOOP-2449. A return of the non-MR version of NNBench.
			
 
				+    (Sanjay Radia via shv)
			
 
				+
			
 
				+    HADOOP-1989. Remove 'datanodecluster' command from bin/hadoop.
			
 
				+    (Sanjay Radia via shv)
			
 
				+
			
 
				+    HADOOP-1742. Improve JavaDoc documentation for ClientProtocol, DFSClient,
			
 
				+    and FSNamesystem. (Konstantin Shvachko)
			
 
				+
			
 
				+    HADOOP-2298. Add Ant target for a binary-only distribution.
			
 
				+    (Hrishikesh via nigel)
			
 
				+
			
 
				+    HADOOP-2509. Add Ant target for Rat report (Apache license header
			
 
				+    reports).  (Hrishikesh via nigel)
			
 
				+
			
 
				+    HADOOP-2469.  WritableUtils.clone should take a Configuration
			
 
				+    instead of a JobConf. (stack via omalley)
			
 
				+
			
 
				+    HADOOP-2659. Introduce superuser permissions for admin operations.
			
 
				+    (Tsz Wo (Nicholas), SZE via shv)
			
 
				+
			
 
				+    HADOOP-2596. Added a SequenceFile.createWriter api which allows the user
			
 
				+    to specify the blocksize, replication factor and the buffersize to be
			
 
				+    used for the underlying HDFS file. (Alejandro Abdelnur via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2431. Test HDFS File Permissions. (Hairong Kuang via shv)
			
 
				+
			
 
				+    HADOOP-2232. Add an option to disable Nagle's algorithm in the IPC stack.
			
 
				+    (Clint Morgan via cdouglas)
			
 
				+
			
 
				+    HADOOP-2342. Created a micro-benchmark for measuring 
			
 
				+    local-file versus hdfs reads. (Owen O'Malley via nigel)
			
 
				+
			
 
				+    HADOOP-2529. First version of HDFS User Guide. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-2690. Add jar-test target to build.xml, separating compilation
			
 
				+    and packaging of the test classes. (Enis Soztutar via cdouglas)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+    HADOOP-1898.  Release the lock protecting the last time of the last stack
			
 
				+    dump while the dump is happening. (Amareshwari Sri Ramadasu via omalley)
			
 
				+
			
 
				+    HADOOP-1900. Makes the heartbeat and task event queries interval 
			
 
				+    dependent on the cluster size.  (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2208. Counter update frequency (from TaskTracker to JobTracker) is 
			
 
				+    capped at 1 minute.  (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2284. Reduce the number of progress updates during the sorting in 
			
 
				+    the map task. (Amar Kamat via ddas)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2583.  Fixes a bug in the Eclipse plug-in UI to edit locations.
			
 
				+    Plug-in version is now synchronized with Hadoop version.
			
 
				+
			
 
				+    HADOOP-2100.  Remove faulty check for existence of $HADOOP_PID_DIR and let
			
 
				+    'mkdir -p' check & create it. (Michael Bieniosek via acmurthy)
			
 
				+
			
 
				+    HADOOP-1642.  Ensure jobids generated by LocalJobRunner are unique to
			
 
				+    avoid collissions and hence job-failures. (Doug Cutting via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2096.  Close open file-descriptors held by streams while localizing
			
 
				+    job.xml in the JobTracker and while displaying it on the webui in 
			
 
				+    jobconf.jsp. (Amar Kamat via acmurthy)
			
 
				+
			
 
				+    HADOOP-2098.  Log start & completion of empty jobs to JobHistory, which
			
 
				+    also ensures that we close the file-descriptor of the job's history log 
			
 
				+    opened during job-submission. (Amar Kamat via acmurthy)
			
 
				+
			
 
				+    HADOOP-2112.  Adding back changes to build.xml lost while reverting
			
 
				+    HADOOP-1622 i.e. http://svn.apache.org/viewvc?view=rev&revision=588771.
			
 
				+    (acmurthy)
			
 
				+
			
 
				+    HADOOP-2089.  Fixes the command line argument handling to handle multiple
			
 
				+    -cacheArchive in Hadoop streaming.  (Lohit Vijayarenu via ddas)
			
 
				+
			
 
				+    HADOOP-2071.  Fix StreamXmlRecordReader to use a BufferedInputStream
			
 
				+    wrapped over the DFSInputStream since mark/reset aren't supported by
			
 
				+    DFSInputStream anymore. (Lohit Vijayarenu via acmurthy)
			
 
				+
			
 
				+    HADOOP-1348.  Allow XML comments inside configuration files. 
			
 
				+    (Rajagopal Natarajan and Enis Soztutar via enis)
			
 
				+
			
 
				+    HADOOP-1952.  Improve handling of invalid, user-specified classes while
			
 
				+    configuring streaming jobs such as combiner, input/output formats etc.
			
 
				+    Now invalid options are caught, logged and jobs are failed early. (Lohit
			
 
				+    Vijayarenu via acmurthy)
			
 
				+
			
 
				+    HADOOP-2151. FileSystem.globPaths validates the list of Paths that
			
 
				+    it returns.  (Lohit Vijayarenu via dhruba)
			
 
				+
			
 
				+    HADOOP-2121. Cleanup DFSOutputStream when the stream encountered errors
			
 
				+    when Datanodes became full.  (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1130. The FileSystem.closeAll() method closes all existing
			
 
				+    DFSClients.  (Chris Douglas via dhruba)
			
 
				+
			
 
				+    HADOOP-2204. DFSTestUtil.waitReplication was not waiting for all replicas
			
 
				+    to get created, thus causing unit test failure.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-2078. An zero size file may have no blocks associated with it.
			
 
				+    (Konstantin Shvachko via dhruba)
			
 
				+
			
 
				+    HADOOP-2212. ChecksumFileSystem.getSumBufferSize might throw 
			
 
				+    java.lang.ArithmeticException. The fix is to initialize bytesPerChecksum
			
 
				+    to 0.  (Michael Bieniosek via ddas)
			
 
				+
			
 
				+    HADOOP-2216.  Fix jobtasks.jsp to ensure that it first collects the
			
 
				+    taskids which satisfy the filtering criteria and then use that list to
			
 
				+    print out only the required task-reports, previously it was oblivious to
			
 
				+    the filtering and hence used the wrong index into the array of task-reports. 
			
 
				+    (Amar Kamat via acmurthy)
			
 
				+
			
 
				+    HADOOP-2272.  Fix findbugs target to reflect changes made to the location
			
 
				+    of the streaming jar file by HADOOP-2207.  (Adrian Woodhead via nigel)
			
 
				+
			
 
				+    HADOOP-2244.  Fixes the MapWritable.readFields to clear the instance 
			
 
				+    field variable every time readFields is called. (Michael Stack via ddas).
			
 
				+
			
 
				+    HADOOP-2245.  Fixes LocalJobRunner to include a jobId in the mapId. Also,  
			
 
				+    adds a testcase for JobControl. (Adrian Woodhead via ddas).
			
 
				+
			
 
				+    HADOOP-2275. Fix erroneous detection of corrupted file when namenode 
			
 
				+    fails to allocate any datanodes for newly allocated block.
			
 
				+    (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+    HADOOP-2256. Fix a buf in the namenode that could cause it to encounter
			
 
				+    an infinite loop while deleting excess replicas that were created by 
			
 
				+    block rebalancing.  (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-2209. SecondaryNamenode process exits if it encounters exceptions 
			
 
				+    that it cannot handle.  (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+    HADOOP-2314. Prevent TestBlockReplacement from occasionally getting
			
 
				+    into an infinite loop.  (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-2300. This fixes a bug where mapred.tasktracker.tasks.maximum
			
 
				+    would be ignored even if it was set in hadoop-site.xml.
			
 
				+    (Amareshwari Sri Ramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-2349.  Improve code layout in file system transaction logging code.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2368.  Fix unit tests on Windows.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2363.  This fix allows running multiple instances of the unit test
			
 
				+    in parallel. The bug was introduced in HADOOP-2185 that changed
			
 
				+    port-rolling behaviour.  (Konstantin Shvachko via dhruba)
			
 
				+
			
 
				+    HADOOP-2271.  Fix chmod task to be non-parallel. (Adrian Woodhead via
			
 
				+    omalley)
			
 
				+
			
 
				+    HADOOP-2313.  Fail the build if building libhdfs fails. (nigel via omalley)
			
 
				+
			
 
				+    HADOOP-2359.  Remove warning for interruptted exception when closing down
			
 
				+    minidfs. (dhruba via omalley)
			
 
				+
			
 
				+    HADOOP-1841. Prevent slow clients from consuming threads in the NameNode. 
			
 
				+    (dhruba)
			
 
				+    
			
 
				+    HADOOP-2323. JobTracker.close() should not print stack traces for
			
 
				+    normal exit.  (jimk via cutting)
			
 
				+
			
 
				+    HADOOP-2376. Prevents sort example from overriding the number of maps.
			
 
				+    (Owen O'Malley via ddas)
			
 
				+
			
 
				+    HADOOP-2434. FSDatasetInterface read interface causes HDFS reads to occur 
			
 
				+    in 1 byte chunks, causing performance degradation.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-2459. Fix package target so that src/docs/build files are not
			
 
				+    included in the release.  (nigel)
			
 
				+
			
 
				+    HADOOP-2215.  Fix documentation in cluster_setup.html &
			
 
				+    mapred_tutorial.html reflect that mapred.tasktracker.tasks.maximum has
			
 
				+    been superceeded by mapred.tasktracker.{map|reduce}.tasks.maximum. 
			
 
				+    (Amareshwari Sri Ramadasu via acmurthy)
			
 
				+
			
 
				+    HADOOP-2459. Fix package target so that src/docs/build files are not
			
 
				+    included in the release.  (nigel)
			
 
				+
			
 
				+    HADOOP-2352. Remove AC_CHECK_LIB for libz and liblzo to ensure that
			
 
				+    libhadoop.so doesn't have a dependency on them. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-2453. Fix the configuration for wordcount-simple example in Hadoop 
			
 
				+    Pipes which currently produces an XML parsing error. (Amareshwari Sri
			
 
				+    Ramadasu via acmurthy)
			
 
				+
			
 
				+    HADOOP-2476. Unit test failure while reading permission bits of local
			
 
				+    file system (on Windows) fixed.  (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-2247.  Fine-tune the strategies for killing mappers and reducers
			
 
				+    due to failures while fetching map-outputs. Now the map-completion times
			
 
				+    and number of currently running reduces are taken into account by the
			
 
				+    JobTracker before  killing the mappers, while the progress made by the
			
 
				+    reducer and the number of fetch-failures vis-a-vis total number of
			
 
				+    fetch-attempts are taken into account before teh reducer kills itself.
			
 
				+    (Amar Kamat via acmurthy)
			
 
				+    
			
 
				+    HADOOP-2452. Fix eclipse plug-in build.xml to refers to the right
			
 
				+    location where hadoop-*-core.jar is generated. (taton)
			
 
				+
			
 
				+    HADOOP-2492. Additional debugging in the rpc server to better 
			
 
				+    diagnose ConcurrentModificationException. (dhruba)
			
 
				+
			
 
				+    HADOOP-2344. Enhance the utility for executing shell commands to read the
			
 
				+    stdout/stderr streams while waiting for the command to finish (to free up
			
 
				+    the buffers). Also, this patch throws away stderr of the DF utility.
			
 
				+    @deprecated 
			
 
				+      org.apache.hadoop.fs.ShellCommand for org.apache.hadoop.util.Shell
			
 
				+      org.apache.hadoop.util.ShellUtil for 
			
 
				+        org.apache.hadoop.util.Shell.ShellCommandExecutor
			
 
				+    (Amar Kamat via acmurthy)
			
 
				+
			
 
				+    HADOOP-2511. Fix a javadoc warning in org.apache.hadoop.util.Shell
			
 
				+    introduced by HADOOP-2344. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-2442. Fix TestLocalFileSystemPermission.testLocalFSsetOwner
			
 
				+    to work on more platforms. (Raghu Angadi via nigel)
			
 
				+
			
 
				+    HADOOP-2488. Fix a regression in random read performance.
			
 
				+    (Michael Stack via rangadi)
			
 
				+
			
 
				+    HADOOP-2523. Fix TestDFSShell.testFilePermissions on Windows.
			
 
				+    (Raghu Angadi via nigel)
			
 
				+
			
 
				+    HADOOP-2535. Removed support for deprecated mapred.child.heap.size and
			
 
				+    fixed some indentation issues in TaskRunner. (acmurthy)
			
 
				+    Configuration changes to hadoop-default.xml:
			
 
				+      remove mapred.child.heap.size
			
 
				+
			
 
				+    HADOOP-2512. Fix error stream handling in Shell. Use exit code to
			
 
				+    detect shell command errors in RawLocalFileSystem. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-2446. Fixes TestHDFSServerPorts and TestMRServerPorts so they
			
 
				+    do not rely on statically configured ports and cleanup better. (nigel)
			
 
				+
			
 
				+    HADOOP-2537. Make build process compatible with Ant 1.7.0.
			
 
				+    (Hrishikesh via nigel)
			
 
				+
			
 
				+    HADOOP-1281. Ensure running tasks of completed map TIPs (e.g. speculative
			
 
				+    tasks) are killed as soon as the TIP completed. (acmurthy)
			
 
				+
			
 
				+    HADOOP-2571. Suppress a suprious warning in test code. (cdouglas)
			
 
				+
			
 
				+    HADOOP-2481. NNBench report its progress periodically.
			
 
				+    (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-2601. Start name-node on a free port for TestNNThroughputBenchmark.
			
 
				+    (Konstantin Shvachko)
			
 
				+
			
 
				+    HADOOP-2494.  Set +x on contrib/*/bin/* in packaged tar bundle.
			
 
				+    (stack via tomwhite)
			
 
				+
			
 
				+    HADOOP-2605. Remove bogus leading slash in task-tracker report bindAddress.
			
 
				+    (Konstantin Shvachko)
			
 
				+    
			
 
				+    HADOOP-2620. Trivial. 'bin/hadoop fs -help' did not list chmod, chown, and
			
 
				+    chgrp. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-2614. The DFS WebUI accesses are configured to be from the user
			
 
				+    specified by dfs.web.ugi.  (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2543. Implement a "no-permission-checking" mode for smooth
			
 
				+    upgrade from a pre-0.16 install of HDFS.
			
 
				+    (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-290. A DataNode log message now prints the target of a replication
			
 
				+    request correctly. (dhruba)
			
 
				+
			
 
				+    HADOOP-2538. Redirect to a warning, if plaintext parameter is true but 
			
 
				+    the filter parameter is not given in TaskLogServlet.  
			
 
				+    (Michael Bieniosek via enis)
			
 
				+
			
 
				+    HADOOP-2582. Prevent 'bin/hadoop fs -copyToLocal' from creating
			
 
				+    zero-length files when the src does not exist.
			
 
				+    (Lohit Vijayarenu via cdouglas)
			
 
				+
			
 
				+    HADOOP-2189. Incrementing user counters should count as progress. (ddas)
			
 
				+
			
 
				+    HADOOP-2649. The NameNode periodically computes replication work for
			
 
				+    the datanodes. The periodicity of this computation is now configurable.
			
 
				+    (dhruba)
			
 
				+
			
 
				+    HADOOP-2549. Correct disk size computation so that data-nodes could switch 
			
 
				+    to other local drives if current is full. (Hairong Kuang via shv)
			
 
				+
			
 
				+    HADOOP-2633. Fsck should call name-node methods directly rather than 
			
 
				+    through rpc. (Tsz Wo (Nicholas), SZE via shv)
			
 
				+
			
 
				+    HADOOP-2687. Modify a few log message generated by dfs client to be
			
 
				+    logged only at INFO level. (stack via dhruba)
			
 
				+
			
 
				+    HADOOP-2402. Fix BlockCompressorStream to ensure it buffers data before
			
 
				+    sending it down to the compressor so that each write call doesn't
			
 
				+    compress. (Chris Douglas via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2645. The Metrics initialization code does not throw
			
 
				+    exceptions when servers are restarted by MiniDFSCluster.
			
 
				+    (Sanjay Radia via dhruba)
			
 
				+
			
 
				+    HADOOP-2691. Fix a race condition that was causing the DFSClient
			
 
				+    to erroneously remove a good datanode from a pipeline that actually
			
 
				+    had another datanode that was bad. (dhruba)
			
 
				+
			
 
				+    HADOOP-1195. All code in FSNamesystem checks the return value
			
 
				+    of getDataNode for null before using it. (dhruba)
			
 
				+
			
 
				+    HADOOP-2640. Fix a bug in MultiFileSplitInputFormat that was always
			
 
				+    returning 1 split in some circumstances. (Enis Soztutar via nigel)
			
 
				+
			
 
				+    HADOOP-2626. Fix paths with special characters to work correctly
			
 
				+    with the local filesystem.  (Thomas Friol via cutting)
			
 
				+
			
 
				+    HADOOP-2646. Fix SortValidator to work with fully-qualified 
			
 
				+    working directories.  (Arun C Murthy via nigel)
			
 
				+
			
 
				+    HADOOP-2092. Added a ping mechanism to the pipes' task to periodically
			
 
				+    check if the parent Java task is running, and exit if the parent isn't
			
 
				+    alive and responding. (Amareshwari Sri Ramadasu via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2714. TestDecommission failed on windows because the replication
			
 
				+    request was timing out. (dhruba)
			
 
				+
			
 
				+    HADOOP-2576. Namenode performance degradation over time triggered by
			
 
				+    large heartbeat interval. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-2713. TestDatanodeDeath failed on windows because the replication
			
 
				+    request was timing out. (dhruba)
			
 
				+
			
 
				+    HADOOP-2639. Fixes a problem to do with incorrect maintenance of values 
			
 
				+    for runningMapTasks/runningReduceTasks. (Amar Kamat and Arun Murthy 
			
 
				+    via ddas)
			
 
				+
			
 
				+    HADOOP-2723. Fixed the check for checking whether to do user task
			
 
				+    profiling. (Amareshwari Sri Ramadasu via omalley)
			
 
				+
			
 
				+    HADOOP-2734. Link forrest docs to new http://hadoop.apache.org
			
 
				+    (Doug Cutting via nigel)
			
 
				+
			
 
				+    HADOOP-2641. Added Apache license headers to 95 files. (nigel)
			
 
				+
			
 
				+    HADOOP-2732. Fix bug in path globbing.  (Hairong Kuang via nigel)
			
 
				+
			
 
				+    HADOOP-2404. Fix backwards compatability with hadoop-0.15 configuration
			
 
				+    files that was broken by HADOOP-2185. (omalley)
			
 
				+
			
 
				+    HADOOP-2755. Fix fsck performance degradation because of permissions 
			
 
				+    issue.  (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-2768. Fix performance regression caused by HADOOP-1707.
			
 
				+    (dhruba borthakur via nigel)
			
 
				+
			
 
				+    HADOOP-3108. Fix NPE in setPermission and setOwner. (shv)
			
 
				+
			
 
				+Release 0.15.3 - 2008-01-18
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2562. globPaths supports {ab,cd}.  (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-2540. fsck reports missing blocks incorrectly. (dhruba)
			
 
				+
			
 
				+    HADOOP-2570. "work" directory created unconditionally, and symlinks
			
 
				+    created from the task cwds.
			
 
				+
			
 
				+    HADOOP-2574. Fixed mapred_tutorial.xml to correct minor errors with the
			
 
				+    WordCount examples. (acmurthy) 
			
 
				+
			
 
				+Release 0.15.2 - 2008-01-02
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2246.  Moved the changelog for HADOOP-1851 from the NEW FEATURES 
			
 
				+    section to the INCOMPATIBLE CHANGES section. (acmurthy)
			
 
				+
			
 
				+    HADOOP-2238.  Fix TaskGraphServlet so that it sets the content type of 
			
 
				+    the response appropriately.  (Paul Saab via enis)
			
 
				+
			
 
				+    HADOOP-2129.  Fix so that distcp works correctly when source is
			
 
				+    HDFS but not the default filesystem.  HDFS paths returned by the
			
 
				+    listStatus() method are now fully-qualified.  (cutting)
			
 
				+
			
 
				+    HADOOP-2378.  Fixes a problem where the last task completion event would
			
 
				+    get created after the job completes. (Alejandro Abdelnur via ddas)
			
 
				+
			
 
				+    HADOOP-2228.  Checks whether a job with a certain jobId is already running
			
 
				+    and then tries to create the JobInProgress object. 
			
 
				+    (Johan Oskarsson via ddas)
			
 
				+
			
 
				+    HADOOP-2422.  dfs -cat multiple files fail with 'Unable to write to 
			
 
				+    output stream'.  (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-2460.  When the namenode encounters ioerrors on writing a
			
 
				+    transaction log, it stops writing new transactions to that one.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-2227.  Use the LocalDirAllocator uniformly for handling all of the
			
 
				+    temporary storage required for a given task. It also implies that
			
 
				+    mapred.local.dir.minspacestart is handled by checking if there is enough
			
 
				+    free-space on any one of the available disks. (Amareshwari Sri Ramadasu
			
 
				+    via acmurthy)
			
 
				+
			
 
				+    HADOOP-2437.  Fix the LocalDirAllocator to choose the seed for the
			
 
				+    round-robin disk selections randomly. This helps in spreading data across
			
 
				+    multiple partitions much better. (acmurhty)
			
 
				+
			
 
				+    HADOOP-2486. When the list of files from the InMemoryFileSystem is obtained
			
 
				+    for merging, this patch will ensure that only those files whose checksums
			
 
				+    have also got created (renamed) are returned. (ddas)
			
 
				+
			
 
				+    HADOOP-2456. Hardcode English locale to prevent NumberFormatException
			
 
				+    from occurring when starting the NameNode with certain locales.
			
 
				+    (Matthias Friedrich via nigel)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-2160.  Remove project-level, non-user documentation from
			
 
				+    releases, since it's now maintained in a separate tree.  (cutting)
			
 
				+
			
 
				+    HADOOP-1327.  Add user documentation for streaming.  (cutting)
			
 
				+
			
 
				+    HADOOP-2382.  Add hadoop-default.html to subversion. (cutting)
			
 
				+
			
 
				+    HADOOP-2158. hdfsListDirectory calls FileSystem.listStatus instead
			
 
				+    of FileSystem.listPaths. This reduces the number of RPC calls on the
			
 
				+    namenode, thereby improving scalability.  (Christian Kunz via dhruba)
			
 
				+
			
 
				+Release 0.15.1 - 2007-11-27
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-713.  Reduce CPU usage on namenode while listing directories.
			
 
				+    FileSystem.listPaths does not return the size of the entire subtree.
			
 
				+    Introduced a new API ClientProtocol.getContentLength that returns the
			
 
				+    size of the subtree. (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-1917.  Addition of guides/tutorial for better overall
			
 
				+    documentation for Hadoop. Specifically: 
			
 
				+    * quickstart.html is targetted towards first-time users and helps them 
			
 
				+      setup a single-node cluster and play with Hadoop. 
			
 
				+    * cluster_setup.html helps admins to configure and setup non-trivial
			
 
				+      hadoop clusters.
			
 
				+    * mapred_tutorial.html is a comprehensive Map-Reduce tutorial. 
			
 
				+    (acmurthy) 
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2174.  Removed the unnecessary Reporter.setStatus call from
			
 
				+    FSCopyFilesMapper.close which led to a NPE since the reporter isn't valid
			
 
				+    in the close method. (Chris Douglas via acmurthy) 
			
 
				+
			
 
				+    HADOOP-2172.  Restore performance of random access to local files
			
 
				+    by caching positions of local input streams, avoiding a system
			
 
				+    call. (cutting)
			
 
				+
			
 
				+    HADOOP-2205.  Regenerate the Hadoop website since some of the changes made
			
 
				+    by HADOOP-1917 weren't correctly copied over to the trunk/docs directory. 
			
 
				+    Also fixed a couple of minor typos and broken links. (acmurthy)
			
 
				+
			
 
				+Release 0.15.0 - 2007-11-2
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-1708.  Make files appear in namespace as soon as they are
			
 
				+    created.  (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+    HADOOP-999.  A HDFS Client immediately informs the NameNode of a new
			
 
				+    file creation.  ClientProtocol version changed from 14 to 15.
			
 
				+    (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-932.  File locking interfaces and implementations (that were
			
 
				+    earlier deprecated) are removed.  Client Protocol version changed 
			
 
				+    from 15 to 16.  (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1621.  FileStatus is now a concrete class and FileSystem.listPaths
			
 
				+    is deprecated and replaced with listStatus. (Chris Douglas via omalley)
			
 
				+
			
 
				+    HADOOP-1656.  The blockSize of a file is stored persistently in the file
			
 
				+    inode. (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+    HADOOP-1838.  The blocksize of files created with an earlier release is
			
 
				+    set to the default block size.  (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+    HADOOP-785.  Add support for 'final' Configuration parameters,
			
 
				+    removing support for 'mapred-default.xml', and changing
			
 
				+    'hadoop-site.xml' to not override other files.  Now folks should
			
 
				+    generally use 'hadoop-site.xml' for all configurations.  Values
			
 
				+    with a 'final' tag may not be overridden by subsequently loaded
			
 
				+    configuration files, e.g., by jobs.  (Arun C. Murthy via cutting)
			
 
				+
			
 
				+    HADOOP-1846. DatanodeReport in ClientProtocol can report live 
			
 
				+    datanodes, dead datanodes or all datanodes. Client Protocol version
			
 
				+    changed from 17 to 18.  (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-1851.  Permit specification of map output compression type
			
 
				+    and codec, independent of the final output's compression
			
 
				+    parameters.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+    HADOOP-1819.  Jobtracker cleanups, including binding ports before
			
 
				+    clearing state directories, so that inadvertently starting a
			
 
				+    second jobtracker doesn't trash one that's already running. Removed
			
 
				+    method JobTracker.getTracker() because the static variable, which
			
 
				+    stored the value caused initialization problems.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-89.  A client can access file data even before the creator
			
 
				+    has closed the file. Introduce a new command "tail" from dfs shell.
			
 
				+    (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+    HADOOP-1636.  Allow configuration of the number of jobs kept in
			
 
				+    memory by the JobTracker.  (Michael Bieniosek via omalley)
			
 
				+
			
 
				+    HADOOP-1667.  Reorganize CHANGES.txt into sections to make it
			
 
				+    easier to read.  Also remove numbering, to make merging easier.
			
 
				+    (cutting)
			
 
				+
			
 
				+    HADOOP-1610.  Add metrics for failed tasks.
			
 
				+    (Devaraj Das via tomwhite)
			
 
				+
			
 
				+    HADOOP-1767.  Add "bin/hadoop job -list" sub-command. (taton via cutting)
			
 
				+
			
 
				+    HADOOP-1351.  Add "bin/hadoop job [-fail-task|-kill-task]" sub-commands
			
 
				+    to terminate a particular task-attempt. (Enis Soztutar via acmurthy)
			
 
				+
			
 
				+    HADOOP-1880. SleepJob : An example job that sleeps at each map and 
			
 
				+    reduce task. (enis)
			
 
				+
			
 
				+    HADOOP-1809. Add a link in web site to #hadoop IRC channel. (enis)
			
 
				+
			
 
				+    HADOOP-1894. Add percentage graphs and mapred task completion graphs 
			
 
				+    to Web User Interface. Users not using Firefox may install a plugin to 
			
 
				+    their browsers to see svg graphics. (enis)
			
 
				+
			
 
				+    HADOOP-1914. Introduce a new NamenodeProtocol to allow secondary 
			
 
				+    namenodes and rebalancing processes to communicate with a primary 
			
 
				+    namenode.  (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-1963.  Add a FileSystem implementation for the Kosmos
			
 
				+    Filesystem (KFS).  (Sriram Rao via cutting)
			
 
				+
			
 
				+    HADOOP-1822.  Allow the specialization and configuration of socket
			
 
				+    factories. Provide a StandardSocketFactory, and a SocksSocketFactory to
			
 
				+    allow the use of SOCKS proxies. (taton).
			
 
				+
			
 
				+    HADOOP-1968. FileSystem supports wildcard input syntax "{ }".
			
 
				+    (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-2566. Add globStatus method to the FileSystem interface
			
 
				+    and deprecate globPath and listPath. (Hairong Kuang via hairong)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+    HADOOP-1910.  Reduce the number of RPCs that DistributedFileSystem.create()
			
 
				+    makes to the namenode. (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1565.  Reduce memory usage of NameNode by replacing 
			
 
				+    TreeMap in HDFS Namespace with ArrayList.  
			
 
				+    (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+    HADOOP-1743.  Change DFS INode from a nested class to standalone
			
 
				+    class, with specialized subclasses for directories and files, to
			
 
				+    save memory on the namenode.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+    HADOOP-1759.  Change file name in INode from String to byte[],
			
 
				+    saving memory on the namenode. (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+    HADOOP-1766.  Save memory in namenode by having BlockInfo extend
			
 
				+    Block, and replace many uses of Block with BlockInfo.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+    HADOOP-1687.  Save memory in namenode by optimizing BlockMap
			
 
				+    representation.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+    HADOOP-1774. Remove use of INode.parent in Block CRC upgrade.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1788.  Increase the buffer size on the Pipes command socket.
			
 
				+    (Amareshwari Sri Ramadasu and Christian Kunz via omalley)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-1946.  The Datanode code does not need to invoke du on
			
 
				+    every heartbeat.  (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-1935. Fix a NullPointerException in internalReleaseCreate.
			
 
				+    (Dhruba Borthakur)
			
 
				+
			
 
				+    HADOOP-1933. The nodes listed in include and exclude files 
			
 
				+    are always listed in the datanode report.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1953. The job tracker should wait beteween calls to try and delete 
			
 
				+    the system directory (Owen O'Malley via devaraj)
			
 
				+
			
 
				+    HADOOP-1932. TestFileCreation fails with message saying filestatus.dat
			
 
				+    is of incorrect size.  (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+    HADOOP-1573. Support for 0 reducers in PIPES. 
			
 
				+    (Owen O'Malley via devaraj)
			
 
				+
			
 
				+    HADOOP-1500. Fix typographical errors in the DFS WebUI.
			
 
				+    (Nigel Daley via dhruba)
			
 
				+
			
 
				+    HADOOP-1076. Periodic checkpoint can continue even if an earlier
			
 
				+    checkpoint encountered an error.  (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+    HADOOP-1887. The Namenode encounters an ArrayIndexOutOfBoundsException
			
 
				+    while listing a directory that had a file that was
			
 
				+    being actively written to.  (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+    HADOOP-1904. The Namenode encounters an exception because the
			
 
				+    list of blocks per datanode-descriptor was corrupted.
			
 
				+    (Konstantin Shvachko via dhruba)
			
 
				+
			
 
				+    HADOOP-1762. The Namenode fsimage does not contain a list of
			
 
				+    Datanodes.  (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1890. Removed debugging prints introduced by HADOOP-1774.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1763. Too many lost task trackers on large clusters due to
			
 
				+    insufficient number of RPC handler threads on the JobTracker.
			
 
				+    (Devaraj Das)
			
 
				+
			
 
				+    HADOOP-1463.  HDFS report correct usage statistics for disk space
			
 
				+    used by HDFS.  (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-1692.  In DFS ant task, don't cache the Configuration.
			
 
				+    (Chris Douglas via cutting)
			
 
				+
			
 
				+    HADOOP-1726.  Remove lib/jetty-ext/ant.jar. (omalley)
			
 
				+
			
 
				+    HADOOP-1772.  Fix hadoop-daemon.sh script to get correct hostname
			
 
				+    under Cygwin.  (Tsz Wo (Nicholas), SZE via cutting)
			
 
				+
			
 
				+    HADOOP-1749.  Change TestDFSUpgrade to sort files, fixing sporadic
			
 
				+    test failures.  (Enis Soztutar via cutting)
			
 
				+
			
 
				+    HADOOP-1748.  Fix tasktracker to be able to launch tasks when log
			
 
				+    directory is relative.  (omalley via cutting)
			
 
				+
			
 
				+    HADOOP-1775.  Fix a NullPointerException and an
			
 
				+    IllegalArgumentException in MapWritable.
			
 
				+    (Jim Kellerman via cutting)
			
 
				+
			
 
				+    HADOOP-1795.  Fix so that jobs can generate output file names with
			
 
				+    special characters.  (Fr??d??ric Bertin via cutting)
			
 
				+
			
 
				+    HADOOP-1810.  Fix incorrect value type in MRBench (SmallJobs)
			
 
				+    (Devaraj Das via tomwhite)
			
 
				+
			
 
				+    HADOOP-1806.  Fix ant task to compile again, also fix default
			
 
				+    builds to compile ant tasks.  (Chris Douglas via cutting)
			
 
				+
			
 
				+    HADOOP-1758.  Fix escape processing in librecordio to not be
			
 
				+    quadratic.  (Vivek Ratan via cutting)
			
 
				+
			
 
				+    HADOOP-1817.  Fix MultiFileSplit to read and write the split
			
 
				+    length, so that it is not always zero in map tasks.
			
 
				+    (Thomas Friol via cutting)
			
 
				+
			
 
				+    HADOOP-1853.  Fix contrib/streaming to accept multiple -cacheFile
			
 
				+    options.  (Prachi Gupta via cutting)
			
 
				+
			
 
				+    HADOOP-1818. Fix MultiFileInputFormat so that it does not return 
			
 
				+    empty splits when numPaths < numSplits.  (Thomas Friol via enis)
			
 
				+
			
 
				+    HADOOP-1840. Fix race condition which leads to task's diagnostic
			
 
				+    messages getting lost. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-1885. Fix race condition in MiniDFSCluster shutdown.
			
 
				+    (Chris Douglas via nigel)
			
 
				+
			
 
				+    HADOOP-1889.  Fix path in EC2 scripts for building your own AMI.
			
 
				+    (tomwhite)
			
 
				+
			
 
				+    HADOOP-1892.  Fix a NullPointerException in the JobTracker when
			
 
				+    trying to fetch a task's diagnostic messages from the JobClient.
			
 
				+    (Amar Kamat via acmurthy)
			
 
				+
			
 
				+    HADOOP-1897.  Completely remove about.html page from the web site.
			
 
				+    (enis)
			
 
				+
			
 
				+    HADOOP-1907.  Fix null pointer exception when getting task diagnostics
			
 
				+    in JobClient. (Christian Kunz via omalley)
			
 
				+
			
 
				+    HADOOP-1882.  Remove spurious asterisks from decimal number displays.
			
 
				+    (Raghu Angadi via cutting)
			
 
				+
			
 
				+    HADOOP-1783.  Make S3 FileSystem return Paths fully-qualified with
			
 
				+    scheme and host.  (tomwhite)
			
 
				+
			
 
				+    HADOOP-1925.  Make pipes' autoconf script look for libsocket and libnsl, so
			
 
				+    that it can compile under Solaris. (omalley)
			
 
				+
			
 
				+    HADOOP-1940.  TestDFSUpgradeFromImage must shut down its MiniDFSCluster.
			
 
				+    (Chris Douglas via nigel)
			
 
				+
			
 
				+    HADOOP-1930.  Fix the blame for failed fetchs on the right host. (Arun C.
			
 
				+    Murthy via omalley)
			
 
				+
			
 
				+    HADOOP-1934.  Fix the platform name on Mac to use underscores rather than
			
 
				+    spaces. (omalley)
			
 
				+
			
 
				+    HADOOP-1959.  Use "/" instead of File.separator in the StatusHttpServer.
			
 
				+    (jimk via omalley)
			
 
				+
			
 
				+    HADOOP-1626.  Improve dfsadmin help messages.
			
 
				+    (Lohit Vijayarenu via dhruba)
			
 
				+
			
 
				+    HADOOP-1695.  The SecondaryNamenode waits for the Primary NameNode to
			
 
				+    start up.  (Dhruba Borthakur)
			
 
				+
			
 
				+    HADOOP-1983.  Have Pipes flush the command socket when progress is sent
			
 
				+    to prevent timeouts during long computations. (omalley)
			
 
				+
			
 
				+    HADOOP-1875.  Non-existant directories or read-only directories are
			
 
				+    filtered from dfs.client.buffer.dir.  (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-1992.  Fix the performance degradation in the sort validator. 
			
 
				+    (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-1874.  Move task-outputs' promotion/discard to a separate thread
			
 
				+    distinct from the main heartbeat-processing thread. The main upside being 
			
 
				+    that we do not lock-up the JobTracker during HDFS operations, which
			
 
				+    otherwise may lead to lost tasktrackers if the NameNode is unresponsive.
			
 
				+    (Devaraj Das via acmurthy)
			
 
				+
			
 
				+    HADOOP-2026. Namenode prints out one log line for "Number of transactions"
			
 
				+    at most once every minute. (Dhruba Borthakur)
			
 
				+
			
 
				+    HADOOP-2022.  Ensure that status information for successful tasks is correctly
			
 
				+    recorded at the JobTracker, so that, for example, one may view correct
			
 
				+    information via taskdetails.jsp. This bug was introduced by HADOOP-1874.
			
 
				+    (Amar Kamat via acmurthy)
			
 
				+                                
			
 
				+    HADOOP-2031.  Correctly maintain the taskid which takes the TIP to 
			
 
				+    completion, failing which the case of lost tasktrackers isn't handled
			
 
				+    properly i.e. the map TIP is incorrectly left marked as 'complete' and it
			
 
				+    is never rescheduled elsewhere, leading to hung reduces.
			
 
				+    (Devaraj Das via acmurthy)
			
 
				+
			
 
				+    HADOOP-2018. The source datanode of a data transfer waits for
			
 
				+    a response from the target datanode before closing the data stream.
			
 
				+    (Hairong Kuang via dhruba)
			
 
				+                                
			
 
				+    HADOOP-2023. Disable TestLocalDirAllocator on Windows.
			
 
				+    (Hairong Kuang via nigel)
			
 
				+
			
 
				+    HADOOP-2016.  Ignore status-updates from FAILED/KILLED tasks at the 
			
 
				+    TaskTracker. This fixes a race-condition which caused the tasks to wrongly 
			
 
				+    remain in the RUNNING state even after being killed by the JobTracker and
			
 
				+    thus handicap the cleanup of the task's output sub-directory. (acmurthy)
			
 
				+
			
 
				+    HADOOP-1771. Fix a NullPointerException in streaming caused by an 
			
 
				+    IOException in MROutputThread. (lohit vijayarenu via nigel)
			
 
				+
			
 
				+    HADOOP-2028. Fix distcp so that the log dir does not need to be 
			
 
				+    specified and the destination does not need to exist.
			
 
				+    (Chris Douglas via nigel)
			
 
				+
			
 
				+    HADOOP-2044. The namenode protects all lease manipulations using a 
			
 
				+    sortedLease lock.  (Dhruba Borthakur)
			
 
				+
			
 
				+    HADOOP-2051. The TaskCommit thread should not die for exceptions other
			
 
				+    than the InterruptedException. This behavior is there for the other long
			
 
				+    running threads in the JobTracker. (Arun C Murthy via ddas)
			
 
				+
			
 
				+    HADOOP-1973. The FileSystem object would be accessed on the JobTracker
			
 
				+    through a RPC in the InterTrackerProtocol. The check for the object being
			
 
				+    null was missing and hence NPE would be thrown sometimes. This issue fixes
			
 
				+    that problem.  (Amareshwari Sri Ramadasu via ddas) 
			
 
				+
			
 
				+    HADOOP-2033.  The SequenceFile.Writer.sync method was a no-op, which caused
			
 
				+    very uneven splits for applications like distcp that count on them.
			
 
				+    (omalley)
			
 
				+
			
 
				+    HADOOP-2070.  Added a flush method to pipes' DownwardProtocol and call
			
 
				+    that before waiting for the application to finish to ensure all buffered
			
 
				+    data is flushed. (Owen O'Malley via acmurthy)
			
 
				+
			
 
				+    HADOOP-2080.  Fixed calculation of the checksum file size when the values
			
 
				+    are large. (omalley)
			
 
				+
			
 
				+    HADOOP-2048.  Change error handling in distcp so that each map copies
			
 
				+    as much as possible before reporting the error. Also report progress on
			
 
				+    every copy. (Chris Douglas via omalley)
			
 
				+
			
 
				+    HADOOP-2073.  Change size of VERSION file after writing contents to it.
			
 
				+    (Konstantin Shvachko via dhruba)
			
 
				+ 
			
 
				+    HADOOP-2102.  Fix the deprecated ToolBase to pass its Configuration object
			
 
				+    to the superceding ToolRunner to ensure it picks up the appropriate
			
 
				+    configuration resources. (Dennis Kubes and Enis Soztutar via acmurthy) 
			
 
				+ 
			
 
				+    HADOOP-2103.  Fix minor javadoc bugs introduce by HADOOP-2046. (Nigel
			
 
				+    Daley via acmurthy) 
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-1908. Restructure data node code so that block sending and 
			
 
				+    receiving are seperated from data transfer header handling.
			
 
				+    (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-1921. Save the configuration of completed/failed jobs and make them
			
 
				+    available via the web-ui. (Amar Kamat via devaraj)
			
 
				+
			
 
				+    HADOOP-1266. Remove dependency of package org.apache.hadoop.net on 
			
 
				+    org.apache.hadoop.dfs.  (Hairong Kuang via dhruba)
			
 
				+
			
 
				+    HADOOP-1779. Replace INodeDirectory.getINode() by a getExistingPathINodes()
			
 
				+    to allow the retrieval of all existing INodes along a given path in a
			
 
				+    single lookup. This facilitates removal of the 'parent' field in the
			
 
				+    inode. (Christophe Taton via dhruba)
			
 
				+
			
 
				+    HADOOP-1756. Add toString() to some Writable-s. (ab)
			
 
				+
			
 
				+    HADOOP-1727.  New classes: MapWritable and SortedMapWritable.
			
 
				+    (Jim Kellerman via ab)
			
 
				+
			
 
				+    HADOOP-1651.  Improve progress reporting.
			
 
				+    (Devaraj Das via tomwhite)
			
 
				+
			
 
				+    HADOOP-1595.  dfsshell can wait for a file to achieve its intended
			
 
				+    replication target. (Tsz Wo (Nicholas), SZE via dhruba)
			
 
				+
			
 
				+    HADOOP-1693.  Remove un-needed log fields in DFS replication classes,
			
 
				+    since the log may be accessed statically. (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+    HADOOP-1231.  Add generics to Mapper and Reducer interfaces.
			
 
				+    (tomwhite via cutting)
			
 
				+
			
 
				+    HADOOP-1436.  Improved command-line APIs, so that all tools need
			
 
				+    not subclass ToolBase, and generic parameter parser is public.
			
 
				+    (Enis Soztutar via cutting)
			
 
				+
			
 
				+    HADOOP-1703.  DFS-internal code cleanups, removing several uses of
			
 
				+    the obsolete UTF8.  (Christophe Taton via cutting)
			
 
				+
			
 
				+    HADOOP-1731.  Add Hadoop's version to contrib jar file names.
			
 
				+    (cutting)
			
 
				+
			
 
				+    HADOOP-1689.  Make shell scripts more portable.  All shell scripts
			
 
				+    now explicitly depend on bash, but do not require that bash be
			
 
				+    installed in a particular location, as long as it is on $PATH.
			
 
				+    (cutting)
			
 
				+
			
 
				+    HADOOP-1744.  Remove many uses of the deprecated UTF8 class from
			
 
				+    the HDFS namenode.  (Christophe Taton via cutting)
			
 
				+
			
 
				+    HADOOP-1654.  Add IOUtils class, containing generic io-related
			
 
				+    utility methods.   (Enis Soztutar via cutting)
			
 
				+
			
 
				+    HADOOP-1158.  Change JobTracker to record map-output transmission
			
 
				+    errors and use them to trigger speculative re-execution of tasks.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+    HADOOP-1601.  Change GenericWritable to use ReflectionUtils for
			
 
				+    instance creation, avoiding classloader issues, and to implement
			
 
				+    Configurable.  (Enis Soztutar via cutting)
			
 
				+
			
 
				+    HADOOP-1750.  Log standard output and standard error when forking
			
 
				+    task processes.  (omalley via cutting)
			
 
				+
			
 
				+    HADOOP-1803.  Generalize build.xml to make files in all
			
 
				+    src/contrib/*/bin directories executable.  (stack via cutting)
			
 
				+
			
 
				+    HADOOP-1739.  Let OS always choose the tasktracker's umbilical
			
 
				+    port.  Also switch default address for umbilical connections to
			
 
				+    loopback.  (cutting)
			
 
				+
			
 
				+    HADOOP-1812. Let OS choose ports for IPC and RPC unit tests. (cutting)
			
 
				+
			
 
				+    HADOOP-1825.  Create $HADOOP_PID_DIR when it does not exist.
			
 
				+    (Michael Bieniosek via cutting)
			
 
				+
			
 
				+    HADOOP-1425.  Replace uses of ToolBase with the Tool interface.
			
 
				+    (Enis Soztutar via cutting)
			
 
				+
			
 
				+    HADOOP-1569.  Reimplement DistCP to use the standard FileSystem/URI
			
 
				+    code in Hadoop so that you can copy from and to all of the supported file 
			
 
				+    systems.(Chris Douglas via omalley)
			
 
				+
			
 
				+    HADOOP-1018.  Improve documentation w.r.t handling of lost hearbeats between 
			
 
				+    TaskTrackers and JobTracker. (acmurthy)
			
 
				+
			
 
				+    HADOOP-1718.  Add ant targets for measuring code coverage with clover.
			
 
				+    (simonwillnauer via nigel)
			
 
				+
			
 
				+    HADOOP-1592.  Log error messages to the client console when tasks
			
 
				+    fail.  (Amar Kamat via cutting)
			
 
				+
			
 
				+    HADOOP-1879.  Remove some unneeded casts.  (Nilay Vaish via cutting)
			
 
				+
			
 
				+    HADOOP-1878.  Add space between priority links on job details
			
 
				+    page. (Thomas Friol via cutting)
			
 
				+
			
 
				+    HADOOP-120.  In ArrayWritable, prevent creation with null value
			
 
				+    class, and improve documentation.  (Cameron Pope via cutting)
			
 
				+
			
 
				+    HADOOP-1926. Add a random text writer example/benchmark so that we can
			
 
				+    benchmark compression codecs on random data. (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-1906. Warn the user if they have an obsolete madred-default.xml
			
 
				+    file in their configuration directory. (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-1971.  Warn when job does not specify a jar. (enis via cutting)
			
 
				+
			
 
				+    HADOOP-1942. Increase the concurrency of transaction logging to 
			
 
				+    edits log. Reduce the number of syncs by double-buffering the changes
			
 
				+    to the transaction log. (Dhruba Borthakur)
			
 
				+
			
 
				+    HADOOP-2046.  Improve mapred javadoc.  (Arun C. Murthy via cutting)
			
 
				+
			
 
				+    HADOOP-2105.  Improve overview.html to clarify supported platforms, 
			
 
				+    software pre-requisites for hadoop, how to install them on various 
			
 
				+    platforms and a better general description of hadoop and it's utility. 
			
 
				+    (Jim Kellerman via acmurthy) 
			
 
				+
			
 
				+
			
 
				+Release 0.14.4 - 2007-11-26
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2140.  Add missing Apache Licensing text at the front of several
			
 
				+    C and C++ files.
			
 
				+
			
 
				+    HADOOP-2169.  Fix the DT_SONAME field of libhdfs.so to set it to the
			
 
				+    correct value of 'libhdfs.so', currently it is set to the absolute path of
			
 
				+    libhdfs.so. (acmurthy) 
			
 
				+
			
 
				+    HADOOP-2001.  Make the job priority updates and job kills synchronized on
			
 
				+    the JobTracker. Deadlock was seen in the JobTracker because of the lack of
			
 
				+    this synchronization.  (Arun C Murthy via ddas)
			
 
				+
			
 
				+
			
 
				+Release 0.14.3 - 2007-10-19
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2053. Fixed a dangling reference to a memory buffer in the map 
			
 
				+    output sorter. (acmurthy via omalley)
			
 
				+
			
 
				+    HADOOP-2036. Fix a NullPointerException in JvmMetrics class. (nigel)
			
 
				+
			
 
				+    HADOOP-2043. Release 0.14.2 was compiled with Java 1.6 rather than
			
 
				+    Java 1.5.  (cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.14.2 - 2007-10-09
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-1948. Removed spurious error message during block crc upgrade.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1862.  reduces are getting stuck trying to find map outputs. 
			
 
				+    (Arun C. Murthy via ddas)
			
 
				+ 
			
 
				+    HADOOP-1977. Fixed handling of ToolBase cli options in JobClient.
			
 
				+    (enis via omalley)
			
 
				+
			
 
				+    HADOOP-1972.  Fix LzoCompressor to ensure the user has actually asked
			
 
				+    to finish compression. (arun via omalley)
			
 
				+
			
 
				+    HADOOP-1970.  Fix deadlock in progress reporting in the task. (Vivek
			
 
				+    Ratan via omalley)
			
 
				+
			
 
				+    HADOOP-1978.  Name-node removes edits.new after a successful startup.
			
 
				+    (Konstantin Shvachko via dhruba)
			
 
				+
			
 
				+    HADOOP-1955.  The Namenode tries to not pick the same source Datanode for
			
 
				+    a replication request if the earlier replication request for the same
			
 
				+    block and that source Datanode had failed.
			
 
				+    (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1961.  The -get option to dfs-shell works when a single filename
			
 
				+    is specified.  (Raghu Angadi via dhruba)
			
 
				+
			
 
				+    HADOOP-1997.  TestCheckpoint closes the edits file after writing to it,
			
 
				+    otherwise the rename of this file on Windows fails.
			
 
				+    (Konstantin Shvachko via dhruba)
			
 
				+
			
 
				+Release 0.14.1 - 2007-09-04
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-1740.  Fix null pointer exception in sorting map outputs. (Devaraj
			
 
				+    Das via omalley)
			
 
				+
			
 
				+    HADOOP-1790.  Fix tasktracker to work correctly on multi-homed
			
 
				+    boxes.  (Torsten Curdt via cutting)
			
 
				+
			
 
				+    HADOOP-1798.  Fix jobtracker to correctly account for failed
			
 
				+    tasks.  (omalley via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.14.0 - 2007-08-17
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+  1. HADOOP-1134.
			
 
				+     CONFIG/API - dfs.block.size must now be a multiple of
			
 
				+       io.byte.per.checksum, otherwise new files can not be written.
			
 
				+     LAYOUT - DFS layout version changed from -6 to -7, which will require an
			
 
				+       upgrade from previous versions.
			
 
				+     PROTOCOL - Datanode RPC protocol version changed from 7 to 8.
			
 
				+
			
 
				+  2. HADOOP-1283
			
 
				+     API - deprecated file locking API.
			
 
				+
			
 
				+  3. HADOOP-894
			
 
				+     PROTOCOL - changed ClientProtocol to fetch parts of block locations.
			
 
				+
			
 
				+  4. HADOOP-1336
			
 
				+     CONFIG - Enable speculative execution by default.
			
 
				+
			
 
				+  5. HADOOP-1197
			
 
				+     API - deprecated method for Configuration.getObject, because
			
 
				+       Configurations should only contain strings.
			
 
				+
			
 
				+  6. HADOOP-1343
			
 
				+     API - deprecate Configuration.set(String,Object) so that only strings are
			
 
				+       put in Configrations.
			
 
				+
			
 
				+  7. HADOOP-1207
			
 
				+     CLI - Fix FsShell 'rm' command to continue when a non-existent file is
			
 
				+       encountered.
			
 
				+
			
 
				+  8. HADOOP-1473
			
 
				+     CLI/API - Job, TIP, and Task id formats have changed and are now unique
			
 
				+       across job tracker restarts.
			
 
				+
			
 
				+  9. HADOOP-1400
			
 
				+     API - JobClient constructor now takes a JobConf object instead of a
			
 
				+       Configuration object.
			
 
				+
			
 
				+  NEW FEATURES and BUG FIXES
			
 
				+
			
 
				+  1. HADOOP-1197.  In Configuration, deprecate getObject() and add
			
 
				+     getRaw(), which skips variable expansion. (omalley via cutting)
			
 
				+
			
 
				+  2. HADOOP-1343.  In Configuration, deprecate set(String,Object) and
			
 
				+     implement Iterable. (omalley via cutting)
			
 
				+
			
 
				+  3. HADOOP-1344.  Add RunningJob#getJobName(). (Michael Bieniosek via cutting)
			
 
				+
			
 
				+  4. HADOOP-1342.  In aggregators, permit one to limit the number of
			
 
				+     unique values per key.  (Runping Qi via cutting)
			
 
				+
			
 
				+  5. HADOOP-1340.  Set the replication factor of the MD5 file in the filecache
			
 
				+     to be the same as the replication factor of the original file.
			
 
				+     (Dhruba Borthakur via tomwhite.)
			
 
				+
			
 
				+  6. HADOOP-1355.  Fix null pointer dereference in 
			
 
				+     TaskLogAppender.append(LoggingEvent).  (Arun C Murthy via tomwhite.)
			
 
				+
			
 
				+  7. HADOOP-1357.  Fix CopyFiles to correctly avoid removing "/".
			
 
				+     (Arun C Murthy via cutting)
			
 
				+
			
 
				+  8. HADOOP-234.  Add pipes facility, which permits writing MapReduce
			
 
				+     programs in C++.
			
 
				+
			
 
				+  9. HADOOP-1359.  Fix a potential NullPointerException in HDFS.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 10. HADOOP-1364.  Fix inconsistent synchronization in SequenceFile.
			
 
				+     (omalley via cutting)
			
 
				+
			
 
				+ 11. HADOOP-1379.  Add findbugs target to build.xml.
			
 
				+     (Nigel Daley via cutting)
			
 
				+
			
 
				+ 12. HADOOP-1364.  Fix various inconsistent synchronization issues.
			
 
				+     (Devaraj Das via cutting)
			
 
				+
			
 
				+ 13. HADOOP-1393.  Remove a potential unexpected negative number from
			
 
				+     uses of random number generator. (omalley via cutting)
			
 
				+
			
 
				+ 14. HADOOP-1387.  A number of "performance" code-cleanups suggested
			
 
				+     by findbugs.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 15. HADOOP-1401.  Add contrib/hbase javadoc to tree.  (stack via cutting)
			
 
				+
			
 
				+ 16. HADOOP-894.  Change HDFS so that the client only retrieves a limited
			
 
				+     number of block locations per request from the namenode.
			
 
				+     (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 17. HADOOP-1406.  Plug a leak in MapReduce's use of metrics.
			
 
				+     (David Bowen via cutting)
			
 
				+
			
 
				+ 18. HADOOP-1394.  Implement "performance" code-cleanups in HDFS
			
 
				+     suggested by findbugs.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+ 19. HADOOP-1413.  Add example program that uses Knuth's dancing links
			
 
				+     algorithm to solve pentomino problems.  (omalley via cutting)
			
 
				+
			
 
				+ 20. HADOOP-1226.  Change HDFS so that paths it returns are always
			
 
				+     fully qualified.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 21. HADOOP-800.  Improvements to HDFS web-based file browser.
			
 
				+     (Enis Soztutar via cutting)
			
 
				+
			
 
				+ 22. HADOOP-1408.  Fix a compiler warning by adding a class to replace
			
 
				+     a generic.  (omalley via cutting)
			
 
				+
			
 
				+ 23. HADOOP-1376.  Modify RandomWriter example so that it can generate
			
 
				+     data for the Terasort benchmark.  (Devaraj Das via cutting)
			
 
				+
			
 
				+ 24. HADOOP-1429.  Stop logging exceptions during normal IPC server
			
 
				+     shutdown.  (stack via cutting)
			
 
				+
			
 
				+ 25. HADOOP-1461.  Fix the synchronization of the task tracker to
			
 
				+     avoid lockups in job cleanup.  (Arun C Murthy via omalley)
			
 
				+
			
 
				+ 26. HADOOP-1446.  Update the TaskTracker metrics while the task is
			
 
				+     running. (Devaraj via omalley)
			
 
				+
			
 
				+ 27. HADOOP-1414.  Fix a number of issues identified by FindBugs as
			
 
				+     "Bad Practice".  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 28. HADOOP-1392.  Fix "correctness" bugs identified by FindBugs in
			
 
				+     fs and dfs packages.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+ 29. HADOOP-1412.  Fix "dodgy" bugs identified by FindBugs in fs and
			
 
				+     io packages.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 30. HADOOP-1261.  Remove redundant events from HDFS namenode's edit
			
 
				+     log when a datanode restarts.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+ 31. HADOOP-1336.  Re-enable speculative execution by
			
 
				+     default. (omalley via cutting)
			
 
				+
			
 
				+ 32. HADOOP-1311.  Fix a bug in BytesWritable#set() where start offset
			
 
				+     was ignored.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 33. HADOOP-1450.  Move checksumming closer to user code, so that
			
 
				+     checksums are created before data is stored in large buffers and
			
 
				+     verified after data is read from large buffers, to better catch
			
 
				+     memory errors.  (cutting)
			
 
				+
			
 
				+ 34. HADOOP-1447.  Add support in contrib/data_join for text inputs.
			
 
				+     (Senthil Subramanian via cutting)
			
 
				+
			
 
				+ 35. HADOOP-1456.  Fix TestDecommission assertion failure by setting
			
 
				+     the namenode to ignore the load on datanodes while allocating
			
 
				+     replicas.  (Dhruba Borthakur via tomwhite)
			
 
				+
			
 
				+ 36. HADOOP-1396.  Fix FileNotFoundException on DFS block.
			
 
				+     (Dhruba Borthakur via tomwhite)
			
 
				+
			
 
				+ 37. HADOOP-1467.  Remove redundant counters from WordCount example.
			
 
				+     (Owen O'Malley via tomwhite)
			
 
				+
			
 
				+ 38. HADOOP-1139.  Log HDFS block transitions at INFO level, to better
			
 
				+     enable diagnosis of problems.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 39. HADOOP-1269.  Finer grained locking in HDFS namenode.
			
 
				+     (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 40. HADOOP-1438.  Improve HDFS documentation, correcting typos and
			
 
				+     making images appear in PDF.  Also update copyright date for all
			
 
				+     docs.  (Luke Nezda via cutting)
			
 
				+
			
 
				+ 41. HADOOP-1457.  Add counters for monitoring task assignments.
			
 
				+     (Arun C Murthy via tomwhite)
			
 
				+
			
 
				+ 42. HADOOP-1472.  Fix so that timed-out tasks are counted as failures
			
 
				+     rather than as killed.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 43. HADOOP-1234.  Fix a race condition in file cache that caused
			
 
				+     tasktracker to not be able to find cached files.
			
 
				+     (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 44. HADOOP-1482.  Fix secondary namenode to roll info port.
			
 
				+     (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 45. HADOOP-1300.  Improve removal of excess block replicas to be
			
 
				+     rack-aware.  Attempts are now made to keep replicas on more
			
 
				+     racks.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 46. HADOOP-1417.  Disable a few FindBugs checks that generate a lot
			
 
				+     of spurious warnings.  (Nigel Daley via cutting)
			
 
				+
			
 
				+ 47. HADOOP-1320.  Rewrite RandomWriter example to bypass reduce.
			
 
				+     (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 48. HADOOP-1449.  Add some examples to contrib/data_join.
			
 
				+     (Senthil Subramanian via cutting)
			
 
				+
			
 
				+ 49. HADOOP-1459.  Fix so that, in HDFS, getFileCacheHints() returns
			
 
				+     hostnames instead of IP addresses.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 50. HADOOP-1493.  Permit specification of "java.library.path" system
			
 
				+     property in "mapred.child.java.opts" configuration property.
			
 
				+     (Enis Soztutar via cutting)
			
 
				+
			
 
				+ 51. HADOOP-1372.  Use LocalDirAllocator for HDFS temporary block
			
 
				+     files, so that disk space, writability, etc. is considered.
			
 
				+     (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 52. HADOOP-1193.  Pool allocation of compression codecs.  This
			
 
				+     eliminates a memory leak that could cause OutOfMemoryException,
			
 
				+     and also substantially improves performance.
			
 
				+     (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 53. HADOOP-1492.  Fix a NullPointerException handling version
			
 
				+     mismatch during datanode registration.
			
 
				+     (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 54. HADOOP-1442.  Fix handling of zero-length input splits.
			
 
				+     (Senthil Subramanian via cutting)
			
 
				+
			
 
				+ 55. HADOOP-1444.  Fix HDFS block id generation to check pending
			
 
				+     blocks for duplicates. (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 56. HADOOP-1207.  Fix FsShell's 'rm' command to not stop when one of
			
 
				+     the named files does not exist.  (Tsz Wo Sze via cutting)
			
 
				+
			
 
				+ 57. HADOOP-1475.  Clear tasktracker's file cache before it
			
 
				+     re-initializes, to avoid confusion.  (omalley via cutting)
			
 
				+
			
 
				+ 58. HADOOP-1505.  Remove spurious stacktrace in ZlibFactory
			
 
				+     introduced in HADOOP-1093.  (Michael Stack via tomwhite)
			
 
				+
			
 
				+ 59. HADOOP-1484.  Permit one to kill jobs from the web ui.  Note that
			
 
				+     this is disabled by default.  One must set
			
 
				+     "webinterface.private.actions" to enable this.
			
 
				+     (Enis Soztutar via cutting)
			
 
				+
			
 
				+ 60. HADOOP-1003.  Remove flushing of namenode edit log from primary
			
 
				+     namenode lock, increasing namenode throughput.
			
 
				+     (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 61. HADOOP-1023.  Add links to searchable mail archives.
			
 
				+     (tomwhite via cutting)
			
 
				+
			
 
				+ 62. HADOOP-1504.  Fix terminate-hadoop-cluster script in contrib/ec2
			
 
				+     to only terminate Hadoop instances, and not other instances
			
 
				+     started by the same user.  (tomwhite via cutting)
			
 
				+
			
 
				+ 63. HADOOP-1462.  Improve task progress reporting.  Progress reports
			
 
				+     are no longer blocking since i/o is performed in a separate
			
 
				+     thread.  Reporting during sorting and more is also more
			
 
				+     consistent.  (Vivek Ratan via cutting)
			
 
				+
			
 
				+ 64. [ intentionally blank ]
			
 
				+
			
 
				+ 65. HADOOP-1453.  Remove some unneeded calls to FileSystem#exists()
			
 
				+     when opening files, reducing the namenode load somewhat.
			
 
				+     (Raghu Angadi via cutting)
			
 
				+
			
 
				+ 66. HADOOP-1489.  Fix text input truncation bug due to mark/reset.
			
 
				+     Add a unittest. (Bwolen Yang via cutting)
			
 
				+
			
 
				+ 67. HADOOP-1455.  Permit specification of arbitrary job options on
			
 
				+     pipes command line.  (Devaraj Das via cutting)
			
 
				+
			
 
				+ 68. HADOOP-1501.  Better randomize sending of block reports to
			
 
				+     namenode, so reduce load spikes.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 69. HADOOP-1147.  Remove @author tags from Java source files.
			
 
				+
			
 
				+ 70. HADOOP-1283.  Convert most uses of UTF8 in the namenode to be
			
 
				+     String.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 71. HADOOP-1511.  Speedup hbase unit tests.  (stack via cutting)
			
 
				+
			
 
				+ 72. HADOOP-1517.  Remove some synchronization in namenode to permit
			
 
				+     finer grained locking previously added.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 73. HADOOP-1512.  Fix failing TestTextInputFormat on Windows.
			
 
				+     (Senthil Subramanian via nigel)
			
 
				+
			
 
				+ 74. HADOOP-1518.  Add a session id to job metrics, for use by HOD.
			
 
				+     (David Bowen via cutting)
			
 
				+
			
 
				+ 75. HADOOP-1292.  Change 'bin/hadoop fs -get' to first copy files to
			
 
				+     a temporary name, then rename them to their final name, so that
			
 
				+     failures don't leave partial files.  (Tsz Wo Sze via cutting)
			
 
				+
			
 
				+ 76. HADOOP-1377.  Add support for modification time to FileSystem and
			
 
				+     implement in HDFS and local implementations.  Also, alter access
			
 
				+     to file properties to be through a new FileStatus interface.
			
 
				+     (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 77. HADOOP-1515.  Add MultiFileInputFormat, which can pack multiple,
			
 
				+     typically small, input files into each split.  (Enis Soztutar via cutting)
			
 
				+
			
 
				+ 78. HADOOP-1514.  Make reducers report progress while waiting for map
			
 
				+     outputs, so they're not killed.  (Vivek Ratan via cutting)
			
 
				+
			
 
				+ 79. HADOOP-1508.  Add an Ant task for FsShell operations.  Also add
			
 
				+     new FsShell commands "touchz", "test" and "stat".
			
 
				+     (Chris Douglas via cutting)
			
 
				+
			
 
				+ 80. HADOOP-1028.  Add log messages for server startup and shutdown.
			
 
				+     (Tsz Wo Sze via cutting)
			
 
				+
			
 
				+ 81. HADOOP-1485.  Add metrics for monitoring shuffle.
			
 
				+     (Devaraj Das via cutting)
			
 
				+
			
 
				+ 82. HADOOP-1536.  Remove file locks from libhdfs tests.
			
 
				+     (Dhruba Borthakur via nigel)
			
 
				+
			
 
				+ 83. HADOOP-1520.  Add appropriate synchronization to FSEditsLog.
			
 
				+     (Dhruba Borthakur via nigel)
			
 
				+
			
 
				+ 84. HADOOP-1513.  Fix a race condition in directory creation. 
			
 
				+     (Devaraj via omalley)
			
 
				+
			
 
				+ 85. HADOOP-1546.  Remove spurious column from HDFS web UI.
			
 
				+     (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 86. HADOOP-1556.  Make LocalJobRunner delete working files at end of
			
 
				+     job run.  (Devaraj Das via tomwhite)
			
 
				+
			
 
				+ 87. HADOOP-1571.  Add contrib lib directories to root build.xml
			
 
				+     javadoc classpath.  (Michael Stack via tomwhite)
			
 
				+
			
 
				+ 88. HADOOP-1554.  Log killed tasks to the job history and display them on the
			
 
				+     web/ui. (Devaraj Das via omalley)
			
 
				+
			
 
				+ 89. HADOOP-1533.  Add persistent error logging for distcp. The logs are stored
			
 
				+    into a specified hdfs directory. (Senthil Subramanian via omalley)
			
 
				+
			
 
				+ 90. HADOOP-1286.  Add support to HDFS for distributed upgrades, which
			
 
				+     permits coordinated upgrade of datanode data.
			
 
				+     (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 91. HADOOP-1580.  Improve contrib/streaming so that subprocess exit
			
 
				+     status is displayed for errors.  (John Heidemann via cutting)
			
 
				+
			
 
				+ 92. HADOOP-1448.  In HDFS, randomize lists of non-local block
			
 
				+     locations returned to client, so that load is better balanced.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 93. HADOOP-1578.  Fix datanode to send its storage id to namenode
			
 
				+     during registration.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 94. HADOOP-1584.  Fix a bug in GenericWritable which limited it to
			
 
				+     128 types instead of 256.  (Espen Amble Kolstad via cutting)
			
 
				+
			
 
				+ 95. HADOOP-1473.  Make job ids unique across jobtracker restarts.
			
 
				+     (omalley via cutting)
			
 
				+
			
 
				+ 96. HADOOP-1582.  Fix hdfslib to return 0 instead of -1 at
			
 
				+     end-of-file, per C conventions.  (Christian Kunz via cutting)
			
 
				+
			
 
				+ 97. HADOOP-911.  Fix a multithreading bug in libhdfs.
			
 
				+     (Christian Kunz)
			
 
				+
			
 
				+ 98. HADOOP-1486.  Fix so that fatal exceptions in namenode cause it
			
 
				+     to exit.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 99. HADOOP-1470.  Factor checksum generation and validation out of
			
 
				+     ChecksumFileSystem so that it can be reused by FileSystem's with
			
 
				+     built-in checksumming.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+100. HADOOP-1590.  Use relative urls in jobtracker jsp pages, so that
			
 
				+     webapp can be used in non-root contexts.  (Thomas Friol via cutting)
			
 
				+
			
 
				+101. HADOOP-1596.  Fix the parsing of taskids by streaming and improve the
			
 
				+     error reporting. (omalley)
			
 
				+
			
 
				+102. HADOOP-1535.  Fix the user-controlled grouping to the reduce function.
			
 
				+     (Vivek Ratan via omalley)
			
 
				+
			
 
				+103. HADOOP-1585.  Modify GenericWritable to declare the classes as subtypes
			
 
				+     of Writable (Espen Amble Kolstad via omalley)
			
 
				+
			
 
				+104. HADOOP-1576.  Fix errors in count of completed tasks when
			
 
				+     speculative execution is enabled.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+105. HADOOP-1598.  Fix license headers: adding missing; updating old.
			
 
				+     (Enis Soztutar via cutting)
			
 
				+
			
 
				+106. HADOOP-1547.  Provide examples for aggregate library.
			
 
				+     (Runping Qi via tomwhite)
			
 
				+
			
 
				+107. HADOOP-1570.  Permit jobs to enable and disable the use of
			
 
				+     hadoop's native library.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+108. HADOOP-1433.  Add job priority.  (Johan Oskarsson via tomwhite)
			
 
				+
			
 
				+109. HADOOP-1597.  Add status reports and post-upgrade options to HDFS
			
 
				+     distributed upgrade.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+110. HADOOP-1524.  Permit user task logs to appear as they're
			
 
				+     created.  (Michael Bieniosek via cutting)
			
 
				+
			
 
				+111. HADOOP-1599.  Fix distcp bug on Windows.  (Senthil Subramanian via cutting)
			
 
				+
			
 
				+112. HADOOP-1562.  Add JVM metrics, including GC and logging stats.
			
 
				+     (David Bowen via cutting)
			
 
				+
			
 
				+113. HADOOP-1613.  Fix "DFS Health" page to display correct time of
			
 
				+     last contact.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+114. HADOOP-1134.  Add optimized checksum support to HDFS.  Checksums
			
 
				+     are now stored with each block, rather than as parallel files.
			
 
				+     This reduces the namenode's memory requirements and increases
			
 
				+     data integrity.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+115. HADOOP-1400.  Make JobClient retry requests, so that clients can
			
 
				+     survive jobtracker problems.  (omalley via cutting)
			
 
				+
			
 
				+116. HADOOP-1564.  Add unit tests for HDFS block-level checksums.
			
 
				+     (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+117. HADOOP-1620.  Reduce the number of abstract FileSystem methods,
			
 
				+     simplifying implementations.  (cutting)
			
 
				+
			
 
				+118. HADOOP-1625.  Fix a "could not move files" exception in datanode.
			
 
				+     (Raghu Angadi via cutting)
			
 
				+
			
 
				+119. HADOOP-1624.  Fix an infinite loop in datanode. (Raghu Angadi via cutting)
			
 
				+
			
 
				+120. HADOOP-1084.  Switch mapred file cache to use file modification
			
 
				+     time instead of checksum to detect file changes, as checksums are
			
 
				+     no longer easily accessed.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+130. HADOOP-1623.  Fix an infinite loop when copying directories.
			
 
				+     (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+131. HADOOP-1603.  Fix a bug in namenode initialization where
			
 
				+     default replication is sometimes reset to one on restart.
			
 
				+     (Raghu Angadi via cutting)
			
 
				+
			
 
				+132. HADOOP-1635.  Remove hardcoded keypair name and fix launch-hadoop-cluster
			
 
				+     to support later versions of ec2-api-tools.  (Stu Hood via tomwhite)
			
 
				+
			
 
				+133. HADOOP-1638.  Fix contrib EC2 scripts to support NAT addressing.
			
 
				+     (Stu Hood via tomwhite) 
			
 
				+
			
 
				+134. HADOOP-1632.  Fix an IllegalArgumentException in fsck.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+135. HADOOP-1619.  Fix FSInputChecker to not attempt to read past EOF.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+136. HADOOP-1640.  Fix TestDecommission on Windows.
			
 
				+     (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+137. HADOOP-1587.  Fix TestSymLink to get required system properties.
			
 
				+     (Devaraj Das via omalley)
			
 
				+
			
 
				+138. HADOOP-1628.  Add block CRC protocol unit tests. (Raghu Angadi via omalley)
			
 
				+
			
 
				+139. HADOOP-1653.  FSDirectory code-cleanups. FSDirectory.INode
			
 
				+     becomes a static class.  (Christophe Taton via dhruba)
			
 
				+
			
 
				+140. HADOOP-1066.  Restructure documentation to make more user
			
 
				+     friendly.  (Connie Kleinjans and Jeff Hammerbacher via cutting)
			
 
				+
			
 
				+141. HADOOP-1551.  libhdfs supports setting replication factor and
			
 
				+     retrieving modification time of files.  (Sameer Paranjpye via dhruba)
			
 
				+
			
 
				+141. HADOOP-1647.  FileSystem.getFileStatus returns valid values for "/".
			
 
				+     (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+142. HADOOP-1657.  Fix NNBench to ensure that the block size is a
			
 
				+     multiple of bytes.per.checksum. (Raghu Angadi via dhruba)
			
 
				+
			
 
				+143. HADOOP-1553.  Replace user task output and log capture code to use shell
			
 
				+     redirection instead of copier threads in the TaskTracker. Capping the
			
 
				+     size of the output is now done via tail in memory and thus should not be 
			
 
				+     large. The output of the tasklog servlet is not forced into UTF8 and is
			
 
				+     not buffered entirely in memory. (omalley)
			
 
				+     Configuration changes to hadoop-default.xml:
			
 
				+       remove mapred.userlog.num.splits
			
 
				+       remove mapred.userlog.purge.splits
			
 
				+       change default mapred.userlog.limit.kb to 0 (no limit)
			
 
				+       change default mapred.userlog.retain.hours to 24
			
 
				+     Configuration changes to log4j.properties:
			
 
				+       remove log4j.appender.TLA.noKeepSplits
			
 
				+       remove log4j.appender.TLA.purgeLogSplits
			
 
				+       remove log4j.appender.TLA.logsRetainHours
			
 
				+     URL changes:
			
 
				+       http://<tasktracker>/tasklog.jsp -> http://<tasktracker>tasklog with
			
 
				+         parameters limited to start and end, which may be positive (from
			
 
				+         start) or negative (from end).
			
 
				+     Environment:
			
 
				+       require bash (v2 or later) and tail
			
 
				+
			
 
				+144. HADOOP-1659.  Fix a job id/job name mixup. (Arun C. Murthy via omalley)
			
 
				+
			
 
				+145. HADOOP-1665.  With HDFS Trash enabled and the same file was created
			
 
				+     and deleted more than once, the suceeding deletions creates Trash item
			
 
				+     names suffixed with a integer.  (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+146. HADOOP-1666.  FsShell object can be used for multiple fs commands.
			
 
				+     (Dhruba Borthakur via dhruba)
			
 
				+
			
 
				+147. HADOOP-1654.  Remove performance regression introduced by Block CRC.
			
 
				+     (Raghu Angadi via dhruba)
			
 
				+
			
 
				+148. HADOOP-1680.  Improvements to Block CRC upgrade messages.
			
 
				+     (Raghu Angadi via dhruba)
			
 
				+
			
 
				+149. HADOOP-71.  Allow Text and SequenceFile Map/Reduce inputs from non-default 
			
 
				+     filesystems. (omalley)
			
 
				+
			
 
				+150. HADOOP-1568.  Expose HDFS as xml/http filesystem to provide cross-version
			
 
				+     compatability. (Chris Douglas via omalley)
			
 
				+
			
 
				+151. HADOOP-1668.  Added an INCOMPATIBILITY section to CHANGES.txt. (nigel)
			
 
				+
			
 
				+152. HADOOP-1629.  Added a upgrade test for HADOOP-1134.
			
 
				+     (Raghu Angadi via nigel)
			
 
				+
			
 
				+153. HADOOP-1698.  Fix performance problems on map output sorting for jobs
			
 
				+     with large numbers of reduces. (Devaraj Das via omalley)
			
 
				+
			
 
				+154. HADOOP-1716.  Fix a Pipes wordcount example to remove the 'file:'
			
 
				+     schema from its output path.  (omalley via cutting)
			
 
				+
			
 
				+155. HADOOP-1714.  Fix TestDFSUpgradeFromImage to work on Windows.
			
 
				+     (Raghu Angadi via nigel)
			
 
				+
			
 
				+156. HADOOP-1663.  Return a non-zero exit code if streaming fails. (Lohit Renu
			
 
				+     via omalley)
			
 
				+
			
 
				+157. HADOOP-1712.  Fix an unhandled exception on datanode during block
			
 
				+     CRC upgrade. (Raghu Angadi via cutting)
			
 
				+
			
 
				+158. HADOOP-1717.  Fix TestDFSUpgradeFromImage to work on Solaris.
			
 
				+     (nigel via cutting)
			
 
				+
			
 
				+159. HADOOP-1437.  Add Eclipse plugin in contrib.
			
 
				+     (Eugene Hung and Christophe Taton via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.13.0 - 2007-06-08
			
 
				+
			
 
				+ 1. HADOOP-1047.  Fix TestReplication to succeed more reliably.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 2. HADOOP-1063.  Fix a race condition in MiniDFSCluster test code.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 3. HADOOP-1101.  In web ui, split shuffle statistics from reduce
			
 
				+    statistics, and add some task averages.  (Devaraj Das via cutting)
			
 
				+
			
 
				+ 4. HADOOP-1071.  Improve handling of protocol version mismatch in
			
 
				+    JobTracker.  (Tahir Hashmi via cutting)
			
 
				+
			
 
				+ 5. HADOOP-1116.  Increase heap size used for contrib unit tests.
			
 
				+    (Philippe Gassmann via cutting)
			
 
				+
			
 
				+ 6. HADOOP-1120.  Add contrib/data_join, tools to simplify joining
			
 
				+    data from multiple sources using MapReduce.  (Runping Qi via cutting)
			
 
				+
			
 
				+ 7. HADOOP-1064.  Reduce log level of some DFSClient messages.
			
 
				+    (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+ 8. HADOOP-1137.  Fix StatusHttpServer to work correctly when
			
 
				+    resources are in a jar file.  (Benjamin Reed via cutting)
			
 
				+
			
 
				+ 9. HADOOP-1094.  Optimize generated Writable implementations for
			
 
				+    records to not allocate a new BinaryOutputArchive or
			
 
				+    BinaryInputArchive per call.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+10. HADOOP-1068.  Improve error message for clusters with 0 datanodes.
			
 
				+    (Dhruba Borthakur via tomwhite)
			
 
				+
			
 
				+11. HADOOP-1122.  Fix divide-by-zero exception in FSNamesystem
			
 
				+    chooseTarget method.  (Dhruba Borthakur via tomwhite)
			
 
				+
			
 
				+12. HADOOP-1131.  Add a closeAll() static method to FileSystem.
			
 
				+    (Philippe Gassmann via tomwhite)
			
 
				+
			
 
				+13. HADOOP-1085.  Improve port selection in HDFS and MapReduce test
			
 
				+    code.  Ports are now selected by the OS during testing rather than
			
 
				+    by probing for free ports, improving test reliability.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+14. HADOOP-1153.  Fix HDFS daemons to correctly stop their threads.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+15. HADOOP-1146.  Add a counter for reduce input keys and rename the
			
 
				+    "reduce input records" counter to be "reduce input groups".
			
 
				+    (David Bowen via cutting)
			
 
				+
			
 
				+16. HADOOP-1165.  In records, replace idential generated toString
			
 
				+    methods with a method on the base class.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+17. HADOOP-1164.  Fix TestReplicationPolicy to specify port zero, so
			
 
				+    that a free port is automatically selected.  (omalley via cutting)
			
 
				+
			
 
				+18. HADOOP-1166.  Add a NullOutputFormat and use it in the
			
 
				+    RandomWriter example.  (omalley via cutting)
			
 
				+
			
 
				+19. HADOOP-1169.  Fix a cut/paste error in CopyFiles utility so that
			
 
				+    S3-based source files are correctly copied.  (Michael Stack via cutting)
			
 
				+
			
 
				+20. HADOOP-1167.  Remove extra synchronization in InMemoryFileSystem.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+21. HADOOP-1110.  Fix an off-by-one error counting map inputs.
			
 
				+    (David Bowen via cutting)
			
 
				+
			
 
				+22. HADOOP-1178.  Fix a NullPointerException during namenode startup.
			
 
				+    (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+23. HADOOP-1011.  Fix a ConcurrentModificationException when viewing
			
 
				+    job history.  (Tahir Hashmi via cutting)
			
 
				+
			
 
				+24. HADOOP-672.  Improve help for fs shell commands.
			
 
				+    (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+25. HADOOP-1170.  Improve datanode performance by removing device
			
 
				+    checks from common operations.  (Igor Bolotin via cutting)
			
 
				+
			
 
				+26. HADOOP-1090.  Fix SortValidator's detection of whether the input 
			
 
				+    file belongs to the sort-input or sort-output directory.
			
 
				+    (Arun C Murthy via tomwhite)
			
 
				+
			
 
				+27. HADOOP-1081.  Fix bin/hadoop on Darwin.  (Michael Bieniosek via cutting)
			
 
				+
			
 
				+28. HADOOP-1045.  Add contrib/hbase, a BigTable-like online database.
			
 
				+    (Jim Kellerman via cutting)
			
 
				+
			
 
				+29. HADOOP-1156.  Fix a NullPointerException in MiniDFSCluster.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+30. HADOOP-702.  Add tools to help automate HDFS upgrades.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+31. HADOOP-1163.  Fix ganglia metrics to aggregate metrics from different
			
 
				+    hosts properly.  (Michael Bieniosek via tomwhite)
			
 
				+
			
 
				+32. HADOOP-1194.  Make compression style record level for map output
			
 
				+    compression.  (Arun C Murthy via tomwhite)
			
 
				+
			
 
				+33. HADOOP-1187.  Improve DFS Scalability: avoid scanning entire list of
			
 
				+    datanodes in getAdditionalBlocks.  (Dhruba Borthakur via tomwhite)
			
 
				+
			
 
				+34. HADOOP-1133.  Add tool to analyze and debug namenode on a production
			
 
				+    cluster.  (Dhruba Borthakur via tomwhite)
			
 
				+
			
 
				+35. HADOOP-1151.  Remove spurious printing to stderr in streaming 
			
 
				+    PipeMapRed.  (Koji Noguchi via tomwhite)
			
 
				+
			
 
				+36. HADOOP-988.  Change namenode to use a single map of blocks to metadata.
			
 
				+    (Raghu Angadi via tomwhite)
			
 
				+
			
 
				+37. HADOOP-1203.  Change UpgradeUtilities used by DFS tests to use
			
 
				+    MiniDFSCluster to start and stop NameNode/DataNodes.
			
 
				+    (Nigel Daley via tomwhite)
			
 
				+
			
 
				+38. HADOOP-1217.  Add test.timeout property to build.xml, so that
			
 
				+    long-running unit tests may be automatically terminated.
			
 
				+    (Nigel Daley via cutting)
			
 
				+
			
 
				+39. HADOOP-1149.  Improve DFS Scalability: make 
			
 
				+    processOverReplicatedBlock() a no-op if blocks are not 
			
 
				+    over-replicated.  (Raghu Angadi via tomwhite)
			
 
				+
			
 
				+40. HADOOP-1149.  Improve DFS Scalability: optimize getDistance(), 
			
 
				+    contains(), and isOnSameRack() in NetworkTopology.  
			
 
				+    (Hairong Kuang via tomwhite)
			
 
				+
			
 
				+41. HADOOP-1218.  Make synchronization on TaskTracker's RunningJob 
			
 
				+    object consistent.  (Devaraj Das via tomwhite)
			
 
				+
			
 
				+42. HADOOP-1219.  Ignore progress report once a task has reported as 
			
 
				+    'done'.  (Devaraj Das via tomwhite)
			
 
				+
			
 
				+43. HADOOP-1114.  Permit user to specify additional CLASSPATH elements
			
 
				+    with a HADOOP_CLASSPATH environment variable. (cutting)
			
 
				+
			
 
				+44. HADOOP-1198.  Remove ipc.client.timeout parameter override from 
			
 
				+    unit test configuration.  Using the default is more robust and
			
 
				+    has almost the same run time.  (Arun C Murthy via tomwhite)
			
 
				+
			
 
				+45. HADOOP-1211.  Remove deprecated constructor and unused static 
			
 
				+    members in DataNode class.  (Konstantin Shvachko via tomwhite)
			
 
				+
			
 
				+46. HADOOP-1136.  Fix ArrayIndexOutOfBoundsException in 
			
 
				+    FSNamesystem$UnderReplicatedBlocks add() method.  
			
 
				+    (Hairong Kuang via tomwhite)
			
 
				+
			
 
				+47. HADOOP-978.  Add the client name and the address of the node that
			
 
				+    previously started to create the file to the description of 
			
 
				+    AlreadyBeingCreatedException.  (Konstantin Shvachko via tomwhite)
			
 
				+
			
 
				+48. HADOOP-1001.  Check the type of keys and values generated by the 
			
 
				+    mapper against the types specified in JobConf.  
			
 
				+    (Tahir Hashmi via tomwhite)
			
 
				+
			
 
				+49. HADOOP-971.  Improve DFS Scalability: Improve name node performance
			
 
				+    by adding a hostname to datanodes map.  (Hairong Kuang via tomwhite)
			
 
				+
			
 
				+50. HADOOP-1189.  Fix 'No space left on device' exceptions on datanodes.
			
 
				+    (Raghu Angadi via tomwhite)
			
 
				+
			
 
				+51. HADOOP-819.  Change LineRecordWriter to not insert a tab between
			
 
				+    key and value when either is null, and to print nothing when both
			
 
				+    are null.  (Runping Qi via cutting)
			
 
				+
			
 
				+52. HADOOP-1204.  Rename InputFormatBase to be FileInputFormat, and
			
 
				+    deprecate InputFormatBase.  Also make LineRecordReader easier to
			
 
				+    extend.  (Runping Qi via cutting)
			
 
				+
			
 
				+53. HADOOP-1213.  Improve logging of errors by IPC server, to
			
 
				+    consistently include the service name and the call.  (cutting)
			
 
				+
			
 
				+54. HADOOP-1238.  Fix metrics reporting by TaskTracker to correctly
			
 
				+    track maps_running and reduces_running.
			
 
				+    (Michael Bieniosek via cutting)
			
 
				+
			
 
				+55. HADOOP-1093.  Fix a race condition in HDFS where blocks were
			
 
				+    sometimes erased before they were reported written.
			
 
				+    (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+56. HADOOP-1239.  Add a package name to some testjar test classes.
			
 
				+    (Jim Kellerman via cutting)
			
 
				+
			
 
				+57. HADOOP-1241.  Fix NullPointerException in processReport when 
			
 
				+    namenode is restarted.  (Dhruba Borthakur via tomwhite)
			
 
				+
			
 
				+58. HADOOP-1244.  Fix stop-dfs.sh to no longer incorrectly specify 
			
 
				+    slaves file for stopping datanode.  
			
 
				+    (Michael Bieniosek via tomwhite)
			
 
				+
			
 
				+59. HADOOP-1253.  Fix ConcurrentModificationException and 
			
 
				+    NullPointerException in JobControl.  
			
 
				+    (Johan Oskarson via tomwhite)
			
 
				+
			
 
				+60. HADOOP-1256.  Fix NameNode so that multiple DataNodeDescriptors
			
 
				+    can no longer be created on startup.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+61. HADOOP-1214.  Replace streaming classes with new counterparts 
			
 
				+    from Hadoop core.  (Runping Qi via tomwhite)
			
 
				+
			
 
				+62. HADOOP-1250.  Move a chmod utility from streaming to FileUtil.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+63. HADOOP-1258.  Fix TestCheckpoint test case to wait for 
			
 
				+    MiniDFSCluster to be active.  (Nigel Daley via tomwhite)
			
 
				+
			
 
				+64. HADOOP-1148.  Re-indent all Java source code to consistently use
			
 
				+    two spaces per indent level.  (cutting)
			
 
				+
			
 
				+65. HADOOP-1251.  Add a method to Reporter to get the map InputSplit.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+66. HADOOP-1224.  Fix "Browse the filesystem" link to no longer point 
			
 
				+    to dead datanodes.  (Enis Soztutar via tomwhite)
			
 
				+
			
 
				+67. HADOOP-1154.  Fail a streaming task if the threads reading from or 
			
 
				+    writing to the streaming process fail.  (Koji Noguchi via tomwhite)
			
 
				+
			
 
				+68. HADOOP-968.  Move shuffle and sort to run in reduce's child JVM,
			
 
				+    rather than in TaskTracker.  (Devaraj Das via cutting)
			
 
				+
			
 
				+69. HADOOP-1111.  Add support for client notification of job
			
 
				+    completion. If the job configuration has a job.end.notification.url
			
 
				+    property it will make a HTTP GET request to the specified URL.
			
 
				+    The number of retries and the interval between retries is also
			
 
				+    configurable. (Alejandro Abdelnur via tomwhite)
			
 
				+
			
 
				+70. HADOOP-1275.  Fix misspelled job notification property in
			
 
				+    hadoop-default.xml.  (Alejandro Abdelnur via tomwhite)
			
 
				+
			
 
				+71. HADOOP-1152.  Fix race condition in MapOutputCopier.copyOutput file
			
 
				+    rename causing possible reduce task hang.
			
 
				+    (Tahir Hashmi via tomwhite)
			
 
				+
			
 
				+72. HADOOP-1050.  Distinguish between failed and killed tasks so as to 
			
 
				+    not count a lost tasktracker against the job.  
			
 
				+    (Arun C Murthy via tomwhite)
			
 
				+
			
 
				+73. HADOOP-1271.  Fix StreamBaseRecordReader to be able to log record 
			
 
				+    data that's not UTF-8.  (Arun C Murthy via tomwhite)
			
 
				+
			
 
				+74. HADOOP-1190.  Fix unchecked warnings in main Hadoop code.  
			
 
				+    (tomwhite)
			
 
				+
			
 
				+75. HADOOP-1127.  Fix AlreadyBeingCreatedException in namenode for 
			
 
				+    jobs run with speculative execution.
			
 
				+    (Arun C Murthy via tomwhite)
			
 
				+
			
 
				+76. HADOOP-1282.  Omnibus HBase patch.  Improved tests & configuration.
			
 
				+    (Jim Kellerman via cutting)
			
 
				+
			
 
				+77. HADOOP-1262.  Make dfs client try to read from a different replica 
			
 
				+    of the checksum file when a checksum error is detected.  
			
 
				+    (Hairong Kuang via tomwhite)
			
 
				+
			
 
				+78. HADOOP-1279.  Fix JobTracker to maintain list of recently
			
 
				+    completed jobs by order of completion, not submission.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+79. HADOOP-1284.  In contrib/streaming, permit flexible specification
			
 
				+    of field delimiter and fields for partitioning and sorting.
			
 
				+    (Runping Qi via cutting)
			
 
				+
			
 
				+80. HADOOP-1176.  Fix a bug where reduce would hang when a map had
			
 
				+    more than 2GB of output for it.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+81. HADOOP-1293.  Fix contrib/streaming to print more than the first
			
 
				+    twenty lines of standard error.  (Koji Noguchi via cutting)
			
 
				+
			
 
				+82. HADOOP-1297.  Fix datanode so that requests to remove blocks that
			
 
				+    do not exist no longer causes block reports to be re-sent every
			
 
				+    second.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+83. HADOOP-1216.  Change MapReduce so that, when numReduceTasks is
			
 
				+    zero, map outputs are written directly as final output, skipping
			
 
				+    shuffle, sort and reduce.  Use this to implement reduce=NONE
			
 
				+    option in contrib/streaming.  (Runping Qi via cutting)
			
 
				+
			
 
				+84. HADOOP-1294.  Fix unchecked warnings in main Hadoop code under 
			
 
				+    Java 6.  (tomwhite)
			
 
				+
			
 
				+85. HADOOP-1299.  Fix so that RPC will restart after RPC.stopClient()
			
 
				+    has been called.  (Michael Stack via cutting)
			
 
				+
			
 
				+86. HADOOP-1278.  Improve blacklisting of TaskTrackers by JobTracker,
			
 
				+    to reduce false positives.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+87. HADOOP-1290.  Move contrib/abacus into mapred/lib/aggregate.
			
 
				+    (Runping Qi via cutting)
			
 
				+
			
 
				+88. HADOOP-1272.  Extract inner classes from FSNamesystem into separate 
			
 
				+    classes.  (Dhruba Borthakur via tomwhite)
			
 
				+
			
 
				+89. HADOOP-1247.  Add support to contrib/streaming for aggregate
			
 
				+    package, formerly called Abacus.  (Runping Qi via cutting)
			
 
				+
			
 
				+90. HADOOP-1061.  Fix bug in listing files in the S3 filesystem.
			
 
				+    NOTE: this change is not backwards compatible!  You should use the 
			
 
				+    MigrationTool supplied to migrate existing S3 filesystem data to 
			
 
				+    the new format.  Please backup your data first before upgrading 
			
 
				+    (using 'hadoop distcp' for example).  (tomwhite)
			
 
				+
			
 
				+91. HADOOP-1304.  Make configurable the maximum number of task
			
 
				+    attempts before a job fails.  (Devaraj Das via cutting)
			
 
				+
			
 
				+92. HADOOP-1308.  Use generics to restrict types when classes are
			
 
				+    passed as parameters to JobConf methods. (Michael Bieniosek via cutting)
			
 
				+
			
 
				+93. HADOOP-1312.  Fix a ConcurrentModificationException in NameNode
			
 
				+    that killed the heartbeat monitoring thread.
			
 
				+    (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+94. HADOOP-1315.  Clean up contrib/streaming, switching it to use core
			
 
				+    classes more and removing unused code.  (Runping Qi via cutting)
			
 
				+
			
 
				+95. HADOOP-485.  Allow a different comparator for grouping keys in
			
 
				+    calls to reduce.  (Tahir Hashmi via cutting)
			
 
				+
			
 
				+96. HADOOP-1322.  Fix TaskTracker blacklisting to work correctly in
			
 
				+    one- and two-node clusters.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+97. HADOOP-1144.  Permit one to specify a maximum percentage of tasks
			
 
				+    that can fail before a job is aborted.  The default is zero.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+98. HADOOP-1184.  Fix HDFS decomissioning to complete when the only
			
 
				+    copy of a block is on a decommissioned node. (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+99. HADOOP-1263.  Change DFSClient to retry certain namenode calls
			
 
				+    with a random, exponentially increasing backoff time, to avoid
			
 
				+    overloading the namenode on, e.g., job start.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+100. HADOOP-1325.  First complete, functioning version of HBase.
			
 
				+    (Jim Kellerman via cutting)
			
 
				+
			
 
				+101. HADOOP-1276.  Make tasktracker expiry interval configurable.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+102. HADOOP-1326.  Change JobClient#RunJob() to return the job.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+103. HADOOP-1270.  Randomize the fetch of map outputs, speeding the
			
 
				+     shuffle.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+104. HADOOP-1200.  Restore disk checking lost in HADOOP-1170.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+105. HADOOP-1252.  Changed MapReduce's allocation of local files to
			
 
				+     use round-robin among available devices, rather than a hashcode.
			
 
				+     More care is also taken to not allocate files on full or offline
			
 
				+     drives.  (Devaraj Das via cutting)
			
 
				+
			
 
				+106. HADOOP-1324.  Change so that an FSError kills only the task that
			
 
				+     generates it rather than the entire task tracker.
			
 
				+     (Arun C Murthy via cutting)
			
 
				+
			
 
				+107. HADOOP-1310.  Fix unchecked warnings in aggregate code.  (tomwhite)
			
 
				+
			
 
				+108. HADOOP-1255.  Fix a bug where the namenode falls into an infinite
			
 
				+     loop trying to remove a dead node.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+109. HADOOP-1160.  Fix DistributedFileSystem.close() to close the
			
 
				+     underlying FileSystem, correctly aborting files being written.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+110. HADOOP-1341.  Fix intermittent failures in HBase unit tests
			
 
				+     caused by deadlock.  (Jim Kellerman via cutting)
			
 
				+
			
 
				+111. HADOOP-1350.  Fix shuffle performance problem caused by forcing
			
 
				+     chunked encoding of map outputs.  (Devaraj Das via cutting)
			
 
				+
			
 
				+112. HADOOP-1345.  Fix HDFS to correctly retry another replica when a
			
 
				+     checksum error is encountered.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+113. HADOOP-1205.  Improve synchronization around HDFS block map.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+114. HADOOP-1353.  Fix a potential NullPointerException in namenode.
			
 
				+     (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+115. HADOOP-1354.  Fix a potential NullPointerException in FsShell.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+116. HADOOP-1358.  Fix a potential bug when DFSClient calls skipBytes.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+117. HADOOP-1356.  Fix a bug in ValueHistogram.  (Runping Qi via cutting)
			
 
				+
			
 
				+118. HADOOP-1363.  Fix locking bug in JobClient#waitForCompletion().
			
 
				+     (omalley via cutting)
			
 
				+
			
 
				+119. HADOOP-1368.  Fix inconsistent synchronization in JobInProgress.
			
 
				+     (omalley via cutting)
			
 
				+
			
 
				+120. HADOOP-1369.  Fix inconsistent synchronization in TaskTracker.
			
 
				+     (omalley via cutting)
			
 
				+
			
 
				+121. HADOOP-1361.  Fix various calls to skipBytes() to check return
			
 
				+     value. (Hairong Kuang via cutting)
			
 
				+
			
 
				+122. HADOOP-1388.  Fix a potential NullPointerException in web ui.
			
 
				+     (Devaraj Das via cutting)
			
 
				+
			
 
				+123. HADOOP-1385.  Fix MD5Hash#hashCode() to generally hash to more
			
 
				+     than 256 values.  (omalley via cutting)
			
 
				+
			
 
				+124. HADOOP-1386.  Fix Path to not permit the empty string as a
			
 
				+     path, as this has lead to accidental file deletion.  Instead
			
 
				+     force applications to use "." to name the default directory.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+125. HADOOP-1407.  Fix integer division bug in JobInProgress which
			
 
				+     meant failed tasks didn't cause the job to fail.
			
 
				+     (Arun C Murthy via tomwhite)
			
 
				+
			
 
				+126. HADOOP-1427.  Fix a typo that caused GzipCodec to incorrectly use
			
 
				+     a very small input buffer.  (Espen Amble Kolstad via cutting)
			
 
				+
			
 
				+127. HADOOP-1435.  Fix globbing code to no longer use the empty string
			
 
				+     to indicate the default directory, per HADOOP-1386.
			
 
				+     (Hairong Kuang via cutting)
			
 
				+
			
 
				+128. HADOOP-1411.  Make task retry framework handle 
			
 
				+     AlreadyBeingCreatedException when wrapped as a RemoteException.
			
 
				+     (Hairong Kuang via tomwhite)
			
 
				+
			
 
				+129. HADOOP-1242.  Improve handling of DFS upgrades.
			
 
				+     (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+130. HADOOP-1332.  Fix so that TaskTracker exits reliably during unit
			
 
				+     tests on Windows.  (omalley via cutting)
			
 
				+
			
 
				+131. HADOOP-1431.  Fix so that sort progress reporting during map runs
			
 
				+     only while sorting, so that stuck maps are correctly terminated.
			
 
				+     (Devaraj Das and Arun C Murthy via cutting)
			
 
				+
			
 
				+132. HADOOP-1452.  Change TaskTracker.MapOutputServlet.doGet.totalRead
			
 
				+     to a long, permitting map outputs to exceed 2^31 bytes.
			
 
				+     (omalley via cutting)
			
 
				+
			
 
				+133. HADOOP-1443.  Fix a bug opening zero-length files in HDFS.
			
 
				+     (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.12.3 - 2007-04-06
			
 
				+
			
 
				+ 1. HADOOP-1162.  Fix bug in record CSV and XML serialization of
			
 
				+    binary values.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 2. HADOOP-1123.  Fix NullPointerException in LocalFileSystem when
			
 
				+    trying to recover from a checksum error.
			
 
				+    (Hairong Kuang & Nigel Daley via tomwhite)
			
 
				+
			
 
				+ 3. HADOOP-1177.  Fix bug where IOException in MapOutputLocation.getFile
			
 
				+    was not being logged.  (Devaraj Das via tomwhite)
			
 
				+
			
 
				+ 4. HADOOP-1175.  Fix bugs in JSP for displaying a task's log messages.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 5. HADOOP-1191.  Fix map tasks to wait until sort progress thread has
			
 
				+    stopped before reporting the task done.  (Devaraj Das via cutting)
			
 
				+
			
 
				+ 6. HADOOP-1192.  Fix an integer overflow bug in FSShell's 'dus'
			
 
				+    command and a performance problem in HDFS's implementation of it.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 7. HADOOP-1105. Fix reducers to make "progress" while iterating 
			
 
				+    through values.  (Devaraj Das & Owen O'Malley via tomwhite)
			
 
				+
			
 
				+ 8. HADOOP-1179. Make Task Tracker close index file as soon as the read 
			
 
				+    is done when serving get-map-output requests.  
			
 
				+    (Devaraj Das via tomwhite)
			
 
				+
			
 
				+
			
 
				+Release 0.12.2 - 2007-23-17
			
 
				+
			
 
				+ 1. HADOOP-1135.  Fix bug in block report processing which may cause
			
 
				+    the namenode to delete blocks.  (Dhruba Borthakur via tomwhite)
			
 
				+
			
 
				+ 2. HADOOP-1145.  Make XML serializer and deserializer classes public
			
 
				+    in record package.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 3. HADOOP-1140.  Fix a deadlock in metrics. (David Bowen via cutting)
			
 
				+
			
 
				+ 4. HADOOP-1150.  Fix streaming -reducer and -mapper to give them
			
 
				+    defaults. (Owen O'Malley via tomwhite)
			
 
				+
			
 
				+
			
 
				+Release 0.12.1 - 2007-03-17
			
 
				+
			
 
				+ 1. HADOOP-1035.  Fix a StackOverflowError in FSDataSet.
			
 
				+    (Raghu Angadi via cutting)
			
 
				+
			
 
				+ 2. HADOOP-1053.  Fix VInt representation of negative values.  Also
			
 
				+    remove references in generated record code to methods outside of
			
 
				+    the record package and improve some record documentation.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 3. HADOOP-1067.  Compile fails if Checkstyle jar is present in lib
			
 
				+    directory. Also remove dependency on a particular Checkstyle
			
 
				+    version number. (tomwhite)
			
 
				+
			
 
				+ 4. HADOOP-1060.  Fix an IndexOutOfBoundsException in the JobTracker
			
 
				+    that could cause jobs to hang.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 5. HADOOP-1077.  Fix a race condition fetching map outputs that could
			
 
				+    hang reduces.  (Devaraj Das via cutting)
			
 
				+
			
 
				+ 6. HADOOP-1083.  Fix so that when a cluster restarts with a missing
			
 
				+    datanode, its blocks are replicated.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 7. HADOOP-1082.  Fix a NullPointerException in ChecksumFileSystem.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 8. HADOOP-1088.  Fix record serialization of negative values.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 9. HADOOP-1080.  Fix bug in bin/hadoop on Windows when native
			
 
				+    libraries are present.  (ab via cutting)
			
 
				+
			
 
				+10. HADOOP-1091.  Fix a NullPointerException in MetricsRecord.
			
 
				+    (David Bowen via tomwhite)
			
 
				+
			
 
				+11. HADOOP-1092.  Fix a NullPointerException in HeartbeatMonitor
			
 
				+    thread. (Hairong Kuang via tomwhite)
			
 
				+
			
 
				+12. HADOOP-1112.  Fix a race condition in Hadoop metrics.
			
 
				+    (David Bowen via tomwhite)
			
 
				+
			
 
				+13. HADOOP-1108.  Checksummed file system should retry reading if a
			
 
				+    different replica is found when handling ChecksumException.
			
 
				+    (Hairong Kuang via tomwhite)
			
 
				+
			
 
				+14. HADOOP-1070.  Fix a problem with number of racks and datanodes
			
 
				+    temporarily doubling.  (Konstantin Shvachko via tomwhite)
			
 
				+
			
 
				+15. HADOOP-1099.  Fix NullPointerException in JobInProgress.
			
 
				+    (Gautam Kowshik via tomwhite)
			
 
				+
			
 
				+16. HADOOP-1115.  Fix bug where FsShell copyToLocal doesn't
			
 
				+    copy directories.  (Hairong Kuang via tomwhite)
			
 
				+
			
 
				+17. HADOOP-1109.  Fix NullPointerException in StreamInputFormat.
			
 
				+    (Koji Noguchi via tomwhite)
			
 
				+
			
 
				+18. HADOOP-1117.  Fix DFS scalability: when the namenode is
			
 
				+    restarted it consumes 80% CPU. (Dhruba Borthakur via
			
 
				+    tomwhite)
			
 
				+
			
 
				+19. HADOOP-1089.  Make the C++ version of write and read v-int
			
 
				+    agree with the Java versions.  (Milind Bhandarkar via
			
 
				+    tomwhite)
			
 
				+
			
 
				+20. HADOOP-1096.  Rename InputArchive and OutputArchive and
			
 
				+    make them public. (Milind Bhandarkar via tomwhite)
			
 
				+
			
 
				+21. HADOOP-1128.  Fix missing progress information in map tasks.
			
 
				+    (Espen Amble Kolstad, Andrzej Bialecki, and Owen O'Malley
			
 
				+    via tomwhite)
			
 
				+
			
 
				+22. HADOOP-1129.  Fix DFSClient to not hide IOExceptions in
			
 
				+    flush method.  (Hairong Kuang via tomwhite)
			
 
				+
			
 
				+23. HADOOP-1126.  Optimize CPU usage for under replicated blocks
			
 
				+    when cluster restarts.  (Hairong Kuang via tomwhite)
			
 
				+
			
 
				+
			
 
				+Release 0.12.0 - 2007-03-02
			
 
				+
			
 
				+ 1. HADOOP-975.  Separate stdout and stderr from tasks.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 2. HADOOP-982.  Add some setters and a toString() method to
			
 
				+    BytesWritable.  (omalley via cutting)
			
 
				+
			
 
				+ 3. HADOOP-858.  Move contrib/smallJobsBenchmark to src/test, removing
			
 
				+    obsolete bits. (Nigel Daley via cutting)
			
 
				+
			
 
				+ 4. HADOOP-992.  Fix MiniMR unit tests to use MiniDFS when specified,
			
 
				+    rather than the local FS.  (omalley via cutting)
			
 
				+
			
 
				+ 5. HADOOP-954.  Change use of metrics to use callback mechanism.
			
 
				+    Also rename utility class Metrics to MetricsUtil.
			
 
				+    (David Bowen & Nigel Daley via cutting)
			
 
				+
			
 
				+ 6. HADOOP-893.  Improve HDFS client's handling of dead datanodes.
			
 
				+    The set is no longer reset with each block, but rather is now
			
 
				+    maintained for the life of an open file.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+ 7. HADOOP-882.  Upgrade to jets3t version 0.5, used by the S3
			
 
				+    FileSystem.  This version supports retries.  (Michael Stack via cutting)
			
 
				+
			
 
				+ 8. HADOOP-977.  Send task's stdout and stderr to JobClient's stdout
			
 
				+    and stderr respectively, with each line tagged by the task's name.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 9. HADOOP-761.  Change unit tests to not use /tmp.  (Nigel Daley via cutting)
			
 
				+
			
 
				+10. HADOOP-1007. Make names of metrics used in Hadoop unique.
			
 
				+    (Nigel Daley via cutting)
			
 
				+
			
 
				+11. HADOOP-491.  Change mapred.task.timeout to be per-job, and make a
			
 
				+    value of zero mean no timeout.  Also change contrib/streaming to
			
 
				+    disable task timeouts.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+12. HADOOP-1010.  Add Reporter.NULL, a Reporter implementation that
			
 
				+    does nothing.  (Runping Qi via cutting)
			
 
				+
			
 
				+13. HADOOP-923.  In HDFS NameNode, move replication computation to a
			
 
				+    separate thread, to improve heartbeat processing time.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+14. HADOOP-476.  Rewrite contrib/streaming command-line processing,
			
 
				+    improving parameter validation.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+15. HADOOP-973.  Improve error messages in Namenode.  This should help
			
 
				+    to track down a problem that was appearing as a
			
 
				+    NullPointerException.  (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+16. HADOOP-649.  Fix so that jobs with no tasks are not lost.
			
 
				+    (Thomas Friol via cutting)
			
 
				+
			
 
				+17. HADOOP-803.  Reduce memory use by HDFS namenode, phase I.
			
 
				+    (Raghu Angadi via cutting)
			
 
				+
			
 
				+18. HADOOP-1021.  Fix MRCaching-based unit tests on Windows.
			
 
				+    (Nigel Daley via cutting)
			
 
				+
			
 
				+19. HADOOP-889.  Remove duplicate code from HDFS unit tests.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+20. HADOOP-943.  Improve HDFS's fsck command to display the filename
			
 
				+    for under-replicated blocks.  (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+21. HADOOP-333.  Add validator for sort benchmark output.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+22. HADOOP-947.  Improve performance of datanode decomissioning.
			
 
				+    (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+23. HADOOP-442.  Permit one to specify hosts allowed to connect to
			
 
				+    namenode and jobtracker with include and exclude files.  (Wendy
			
 
				+    Chien via cutting)
			
 
				+
			
 
				+24. HADOOP-1017.  Cache constructors, for improved performance.
			
 
				+    (Ron Bodkin via cutting)
			
 
				+
			
 
				+25. HADOOP-867.  Move split creation out of JobTracker to client.
			
 
				+    Splits are now saved in a separate file, read by task processes
			
 
				+    directly, so that user code is no longer required in the
			
 
				+    JobTracker.  (omalley via cutting)
			
 
				+
			
 
				+26. HADOOP-1006.  Remove obsolete '-local' option from test code.
			
 
				+    (Gautam Kowshik via cutting)
			
 
				+
			
 
				+27. HADOOP-952. Create a public (shared) Hadoop EC2 AMI.
			
 
				+    The EC2 scripts now support launch of public AMIs.
			
 
				+    (tomwhite)
			
 
				+    
			
 
				+28. HADOOP-1025. Remove some obsolete code in ipc.Server.  (cutting)
			
 
				+
			
 
				+29. HADOOP-997. Implement S3 retry mechanism for failed block
			
 
				+    transfers. This includes a generic retry mechanism for use
			
 
				+    elsewhere in Hadoop. (tomwhite)
			
 
				+
			
 
				+30. HADOOP-990.  Improve HDFS support for full datanode volumes.
			
 
				+    (Raghu Angadi via cutting)
			
 
				+
			
 
				+31. HADOOP-564.  Replace uses of "dfs://" URIs with the more standard
			
 
				+    "hdfs://".  (Wendy Chien via cutting)
			
 
				+
			
 
				+32. HADOOP-1030.  In unit tests, unify setting of ipc.client.timeout.
			
 
				+    Also increase the value used from one to two seconds, in hopes of
			
 
				+    making tests complete more reliably.  (cutting)
			
 
				+
			
 
				+33. HADOOP-654.  Stop assigning tasks to a tasktracker if it has
			
 
				+    failed more than a specified number in the job.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+34. HADOOP-985.  Change HDFS to identify nodes by IP address rather
			
 
				+    than by DNS hostname.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+35. HADOOP-248.  Optimize location of map outputs to not use random
			
 
				+    probes.  (Devaraj Das via cutting)
			
 
				+
			
 
				+36. HADOOP-1029.  Fix streaming's input format to correctly seek to
			
 
				+    the start of splits.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+37. HADOOP-492.  Add per-job and per-task counters.  These are
			
 
				+    incremented via the Reporter interface and available through the
			
 
				+    web ui and the JobClient API.  The mapreduce framework maintains a
			
 
				+    few basic counters, and applications may add their own.  Counters
			
 
				+    are also passed to the metrics system.
			
 
				+    (David Bowen via cutting)
			
 
				+
			
 
				+38. HADOOP-1034.  Fix datanode to better log exceptions.
			
 
				+    (Philippe Gassmann via cutting)
			
 
				+
			
 
				+39. HADOOP-878.  In contrib/streaming, fix reducer=NONE to work with
			
 
				+    multiple maps.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+40. HADOOP-1039.  In HDFS's TestCheckpoint, avoid restarting
			
 
				+    MiniDFSCluster so often, speeding this test.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+41. HADOOP-1040.  Update RandomWriter example to use counters and
			
 
				+    user-defined input and output formats.  (omalley via cutting)
			
 
				+
			
 
				+42. HADOOP-1027.  Fix problems with in-memory merging during shuffle
			
 
				+    and re-enable this optimization.  (Devaraj Das via cutting)
			
 
				+
			
 
				+43. HADOOP-1036.  Fix exception handling in TaskTracker to keep tasks
			
 
				+    from being lost.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+44. HADOOP-1042.  Improve the handling of failed map output fetches.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+45. HADOOP-928.  Make checksums optional per FileSystem.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+46. HADOOP-1044.  Fix HDFS's TestDecommission to not spuriously fail.
			
 
				+    (Wendy Chien via cutting)
			
 
				+
			
 
				+47. HADOOP-972.  Optimize HDFS's rack-aware block placement algorithm.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+48. HADOOP-1043.  Optimize shuffle, increasing parallelism.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+49. HADOOP-940.  Improve HDFS's replication scheduling.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+50. HADOOP-1020.  Fix a bug in Path resolution, and a with unit tests
			
 
				+    on Windows.  (cutting)
			
 
				+
			
 
				+51. HADOOP-941.  Enhance record facility.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+52. HADOOP-1000.  Fix so that log messages in task subprocesses are
			
 
				+    not written to a task's standard error.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+53. HADOOP-1037.  Fix bin/slaves.sh, which currently only works with
			
 
				+    /bin/bash, to specify /bin/bash rather than /bin/sh.  (cutting)
			
 
				+
			
 
				+54. HADOOP-1046. Clean up tmp from partially received stale block files. (ab)
			
 
				+
			
 
				+55. HADOOP-1041.  Optimize mapred counter implementation.  Also group
			
 
				+    counters by their declaring Enum.  (David Bowen via cutting)
			
 
				+
			
 
				+56. HADOOP-1032.  Permit one to specify jars that will be cached
			
 
				+    across multiple jobs.  (Gautam Kowshik via cutting)
			
 
				+
			
 
				+57. HADOOP-1051.  Add optional checkstyle task to build.xml.  To use
			
 
				+    this developers must download the (LGPL'd) checkstyle jar
			
 
				+    themselves.  (tomwhite via cutting)
			
 
				+
			
 
				+58. HADOOP-1049.  Fix a race condition in IPC client.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+60. HADOOP-1056.  Check HDFS include/exclude node lists with both IP
			
 
				+    address and hostname.  (Wendy Chien via cutting)
			
 
				+
			
 
				+61. HADOOP-994.  In HDFS, limit the number of blocks invalidated at
			
 
				+    once.  Large lists were causing datenodes to timeout.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+62. HADOOP-432.  Add a trash feature, disabled by default.  When
			
 
				+    enabled, the FSShell 'rm' command will move things to a trash
			
 
				+    directory in the filesystem.  In HDFS, a thread periodically
			
 
				+    checkpoints the trash and removes old checkpoints.  (cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.11.2 - 2007-02-16
			
 
				+
			
 
				+ 1. HADOOP-1009.  Fix an infinite loop in the HDFS namenode.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+ 2. HADOOP-1014.  Disable in-memory merging during shuffle, as this is
			
 
				+    causing data corruption.  (Devaraj Das via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.11.1 - 2007-02-09
			
 
				+
			
 
				+ 1. HADOOP-976.  Make SequenceFile.Metadata public.  (Runping Qi via cutting)
			
 
				+
			
 
				+ 2. HADOOP-917.  Fix a NullPointerException in SequenceFile's merger
			
 
				+    with large map outputs.  (omalley via cutting)
			
 
				+
			
 
				+ 3. HADOOP-984.  Fix a bug in shuffle error handling introduced by
			
 
				+    HADOOP-331.  If a map output is unavailable, the job tracker is
			
 
				+    once more informed.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 4. HADOOP-987.  Fix a problem in HDFS where blocks were not removed
			
 
				+    from neededReplications after a replication target was selected.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+Release 0.11.0 - 2007-02-02
			
 
				+
			
 
				+ 1. HADOOP-781.  Remove methods deprecated in 0.10 that are no longer
			
 
				+    widely used.  (cutting)
			
 
				+
			
 
				+ 2. HADOOP-842.  Change HDFS protocol so that the open() method is
			
 
				+    passed the client hostname, to permit the namenode to order block
			
 
				+    locations on the basis of network topology.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 3. HADOOP-852.  Add an ant task to compile record definitions, and
			
 
				+    use it to compile record unit tests.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 4. HADOOP-757.  Fix "Bad File Descriptor" exception in HDFS client
			
 
				+    when an output file is closed twice.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+ 5. [ intentionally blank ]
			
 
				+
			
 
				+ 6. HADOOP-890.  Replace dashes in metric names with underscores,
			
 
				+    for better compatibility with some monitoring systems.
			
 
				+    (Nigel Daley via cutting)
			
 
				+
			
 
				+ 7. HADOOP-801.  Add to jobtracker a log of task completion events.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+ 8. HADOOP-855.  In HDFS, try to repair files with checksum errors.
			
 
				+    An exception is still thrown, but corrupt blocks are now removed
			
 
				+    when they have replicas.  (Wendy Chien via cutting)
			
 
				+
			
 
				+ 9. HADOOP-886.  Reduce number of timer threads created by metrics API
			
 
				+    by pooling contexts.  (Nigel Daley via cutting)
			
 
				+
			
 
				+10. HADOOP-897.  Add a "javac.args" property to build.xml that permits
			
 
				+    one to pass arbitrary options to javac. (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+11. HADOOP-899.  Update libhdfs for changes in HADOOP-871.
			
 
				+    (Sameer Paranjpye via cutting)
			
 
				+
			
 
				+12. HADOOP-905.  Remove some dead code from JobClient.  (cutting)
			
 
				+
			
 
				+13. HADOOP-902.  Fix a NullPointerException in HDFS client when
			
 
				+    closing output streams.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+14. HADOOP-735.  Switch generated record code to use BytesWritable to
			
 
				+    represent fields of type 'buffer'. (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+15. HADOOP-830.  Improve mapreduce merge performance by buffering and
			
 
				+    merging multiple map outputs as they arrive at reduce nodes before
			
 
				+    they're written to disk.  (Devaraj Das via cutting)
			
 
				+
			
 
				+16. HADOOP-908.  Add a new contrib package, Abacus, that simplifies
			
 
				+    counting and aggregation, built on MapReduce.  (Runping Qi via cutting)
			
 
				+
			
 
				+17. HADOOP-901.  Add support for recursive renaming to the S3 filesystem.
			
 
				+    (Tom White via cutting)
			
 
				+
			
 
				+18. HADOOP-912.  Fix a bug in TaskTracker.isIdle() that was
			
 
				+    sporadically causing unit test failures.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+19. HADOOP-909.  Fix the 'du' command to correctly compute the size of
			
 
				+    FileSystem directory trees.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+20. HADOOP-731.  When a checksum error is encountered on a file stored
			
 
				+    in HDFS, try another replica of the data, if any.
			
 
				+    (Wendy Chien via cutting)
			
 
				+
			
 
				+21. HADOOP-732.  Add support to SequenceFile for arbitrary metadata,
			
 
				+    as a set of attribute value pairs.  (Runping Qi via cutting)
			
 
				+
			
 
				+22. HADOOP-929.  Fix PhasedFileSystem to pass configuration to
			
 
				+    underlying FileSystem.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+23. HADOOP-935.  Fix contrib/abacus to not delete pre-existing output
			
 
				+    files, but rather to fail in this case.  (Runping Qi via cutting)
			
 
				+
			
 
				+24. HADOOP-936.  More metric renamings, as in HADOOP-890.
			
 
				+    (Nigel Daley via cutting)
			
 
				+
			
 
				+25. HADOOP-856.  Fix HDFS's fsck command to not report that
			
 
				+    non-existent filesystems are healthy.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+26. HADOOP-602.  Remove the dependency on Lucene's PriorityQueue
			
 
				+    utility, by copying it into Hadoop.  This facilitates using Hadoop
			
 
				+    with different versions of Lucene without worrying about CLASSPATH
			
 
				+    order.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+27. [ intentionally blank ]
			
 
				+
			
 
				+28. HADOOP-227.  Add support for backup namenodes, which periodically
			
 
				+    get snapshots of the namenode state.  (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+29. HADOOP-884.  Add scripts in contrib/ec2 to facilitate running
			
 
				+    Hadoop on an Amazon's EC2 cluster.  (Tom White via cutting)
			
 
				+
			
 
				+30. HADOOP-937.  Change the namenode to request re-registration of
			
 
				+    datanodes in more circumstances.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+31. HADOOP-922.  Optimize small forward seeks in HDFS.  If data is has
			
 
				+    likely already in flight, skip ahead rather than re-opening the
			
 
				+    block.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+32. HADOOP-961.  Add a 'job -events' sub-command that prints job
			
 
				+    events, including task completions and failures.  (omalley via cutting)
			
 
				+
			
 
				+33. HADOOP-959.  Fix namenode snapshot code added in HADOOP-227 to
			
 
				+    work on Windows.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+34. HADOOP-934.  Fix TaskTracker to catch metrics exceptions that were
			
 
				+    causing heartbeats to fail.  (Arun Murthy via cutting)
			
 
				+
			
 
				+35. HADOOP-881.  Fix JobTracker web interface to display the correct
			
 
				+    number of task failures.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+36. HADOOP-788.  Change contrib/streaming to subclass TextInputFormat,
			
 
				+    permitting it to take advantage of native compression facilities.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+37. HADOOP-962.  In contrib/ec2: make scripts executable in tar file;
			
 
				+    add a README; make the environment file use a template.
			
 
				+    (Tom White via cutting)
			
 
				+
			
 
				+38. HADOOP-549.  Fix a NullPointerException in TaskReport's
			
 
				+    serialization.  (omalley via cutting)
			
 
				+
			
 
				+39. HADOOP-963.  Fix remote exceptions to have the stack trace of the
			
 
				+    caller thread, not the IPC listener thread.  (omalley via cutting)
			
 
				+
			
 
				+40. HADOOP-967.  Change RPC clients to start sending a version header.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+41. HADOOP-964.  Fix a bug introduced by HADOOP-830 where jobs failed
			
 
				+    whose comparators and/or i/o types were in the job's jar.
			
 
				+    (Dennis Kubes via cutting)
			
 
				+
			
 
				+42. HADOOP-969.  Fix a deadlock in JobTracker.  (omalley via cutting)
			
 
				+
			
 
				+43. HADOOP-862.  Add support for the S3 FileSystem to the CopyFiles
			
 
				+    tool.  (Michael Stack via cutting)
			
 
				+
			
 
				+44. HADOOP-965.  Fix IsolationRunner so that job's jar can be found.
			
 
				+    (Dennis Kubes via cutting)
			
 
				+
			
 
				+45. HADOOP-309.  Fix two NullPointerExceptions in StatusHttpServer.
			
 
				+    (navychen via cutting)
			
 
				+
			
 
				+46. HADOOP-692.  Add rack awareness to HDFS's placement of blocks.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.10.1 - 2007-01-10
			
 
				+
			
 
				+ 1. HADOOP-857.  Fix S3 FileSystem implementation to permit its use
			
 
				+    for MapReduce input and output.  (Tom White via cutting)
			
 
				+
			
 
				+ 2. HADOOP-863.  Reduce logging verbosity introduced by HADOOP-813.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+ 3. HADOOP-815.  Fix memory leaks in JobTracker. (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 4. HADOOP-600.  Fix a race condition in JobTracker.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 5. HADOOP-864.  Fix 'bin/hadoop -jar' to operate correctly when
			
 
				+    hadoop.tmp.dir does not yet exist.  (omalley via cutting)
			
 
				+
			
 
				+ 6. HADOOP-866.  Fix 'dfs -get' command to remove existing crc files,
			
 
				+    if any.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 7. HADOOP-871.  Fix a bug in bin/hadoop setting JAVA_LIBRARY_PATH.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 8. HADOOP-868.  Decrease the number of open files during map,
			
 
				+    respecting io.sort.fa ctor.  (Devaraj Das via cutting)
			
 
				+
			
 
				+ 9. HADOOP-865.  Fix S3 FileSystem so that partially created files can
			
 
				+    be deleted.  (Tom White via cutting)
			
 
				+
			
 
				+10. HADOOP-873.	 Pass java.library.path correctly to child processes.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+11. HADOOP-851.  Add support for the LZO codec.  This is much faster
			
 
				+    than the default, zlib-based compression, but it is only available
			
 
				+    when the native library is built.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+12. HADOOP-880.  Fix S3 FileSystem to remove directories.
			
 
				+    (Tom White via cutting)
			
 
				+
			
 
				+13. HADOOP-879.  Fix InputFormatBase to handle output generated by
			
 
				+    MapFileOutputFormat.  (cutting)
			
 
				+
			
 
				+14. HADOOP-659.  In HDFS, prioritize replication of blocks based on
			
 
				+    current replication level.  Blocks which are severely
			
 
				+    under-replicated should be further replicated before blocks which
			
 
				+    are less under-replicated.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+15. HADOOP-726.  Deprecate FileSystem locking methods.  They are not
			
 
				+    currently usable.  Locking should eventually provided as an
			
 
				+    independent service.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+16. HADOOP-758.  Fix exception handling during reduce so that root
			
 
				+    exceptions are not masked by exceptions in cleanups.
			
 
				+    (Raghu Angadi via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.10.0 - 2007-01-05
			
 
				+
			
 
				+ 1. HADOOP-763. Change DFS namenode benchmark to not use MapReduce.
			
 
				+    (Nigel Daley via cutting)
			
 
				+
			
 
				+ 2. HADOOP-777. Use fully-qualified hostnames for tasktrackers and
			
 
				+    datanodes.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+ 3. HADOOP-621. Change 'dfs -cat' to exit sooner when output has been
			
 
				+    closed.  (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+ 4. HADOOP-752. Rationalize some synchronization in DFS namenode.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+ 5. HADOOP-629. Fix RPC services to better check the protocol name and
			
 
				+    version.  (omalley via cutting)
			
 
				+
			
 
				+ 6. HADOOP-774. Limit the number of invalid blocks returned with
			
 
				+    heartbeats by the namenode to datanodes.  Transmitting and
			
 
				+    processing very large invalid block lists can tie up both the
			
 
				+    namenode and datanode for too long.  (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+ 7. HADOOP-738. Change 'dfs -get' command to not create CRC files by
			
 
				+    default, adding a -crc option to force their creation.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 8. HADOOP-676. Improved exceptions and error messages for common job
			
 
				+    input specification errors.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+ 9. [Included in 0.9.2 release]
			
 
				+
			
 
				+10. HADOOP-756. Add new dfsadmin option to wait for filesystem to be
			
 
				+    operational.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+11. HADOOP-770. Fix jobtracker web interface to display, on restart,
			
 
				+    jobs that were running when it was last stopped.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+12. HADOOP-331. Write all map outputs to a single file with an index,
			
 
				+    rather than to a separate file per reduce task.  This should both
			
 
				+    speed the shuffle and make things more scalable.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+13. HADOOP-818. Fix contrib unit tests to not depend on core unit
			
 
				+    tests.  (omalley via cutting)
			
 
				+
			
 
				+14. HADOOP-786. Log common exception at debug level.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+15. HADOOP-796. Provide more convenient access to failed task
			
 
				+    information in the web interface.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+16. HADOOP-764. Reduce memory allocations in namenode some.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+17. HADOOP-802. Update description of mapred.speculative.execution to
			
 
				+    mention reduces.  (Nigel Daley via cutting)
			
 
				+
			
 
				+18. HADOOP-806. Include link to datanodes on front page of namenode
			
 
				+    web interface.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+19. HADOOP-618.  Make JobSubmissionProtocol public.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+20. HADOOP-782.  Fully remove killed tasks.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+21. HADOOP-792.  Fix 'dfs -mv' to return correct status.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+22. HADOOP-673.  Give each task its own working directory again.
			
 
				+    (Mahadev Konar via cutting)
			
 
				+
			
 
				+23. HADOOP-571.  Extend the syntax of Path to be a URI; to be
			
 
				+    optionally qualified with a scheme and authority.  The scheme
			
 
				+    determines the FileSystem implementation, while the authority
			
 
				+    determines the FileSystem instance.  New FileSystem
			
 
				+    implementations may be provided by defining an fs.<scheme>.impl
			
 
				+    property, naming the FileSystem implementation class.  This
			
 
				+    permits easy integration of new FileSystem implementations.
			
 
				+    (cutting)
			
 
				+
			
 
				+24. HADOOP-720.  Add an HDFS white paper to website.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+25. HADOOP-794.  Fix a divide-by-zero exception when a job specifies
			
 
				+    zero map tasks.  (omalley via cutting)
			
 
				+
			
 
				+26. HADOOP-454.  Add a 'dfs -dus' command that provides summary disk
			
 
				+    usage.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+27. HADOOP-574.  Add an Amazon S3 implementation of FileSystem.  To
			
 
				+    use this, one need only specify paths of the form
			
 
				+    s3://id:secret@bucket/.  Alternately, the AWS access key id and
			
 
				+    secret can be specified in your config, with the properties
			
 
				+    fs.s3.awsAccessKeyId and fs.s3.awsSecretAccessKey.
			
 
				+    (Tom White via cutting)
			
 
				+
			
 
				+28. HADOOP-824.  Rename DFSShell to be FsShell, since it applies
			
 
				+    generically to all FileSystem implementations.  (cutting)
			
 
				+
			
 
				+29. HADOOP-813.  Fix map output sorting to report progress, so that
			
 
				+    sorts which take longer than the task timeout do not fail.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+30. HADOOP-825.  Fix HDFS daemons when configured with new URI syntax.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+31. HADOOP-596.  Fix a bug in phase reporting during reduce.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+32. HADOOP-811.  Add a utility, MultithreadedMapRunner.
			
 
				+    (Alejandro Abdelnur via cutting)
			
 
				+
			
 
				+33. HADOOP-829.  Within HDFS, clearly separate three different
			
 
				+    representations for datanodes: one for RPCs, one for
			
 
				+    namenode-internal use, and one for namespace persistence.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+34. HADOOP-823.  Fix problem starting datanode when not all configured
			
 
				+    data directories exist.  (Bryan Pendleton via cutting)
			
 
				+
			
 
				+35. HADOOP-451.  Add a Split interface.  CAUTION: This incompatibly
			
 
				+    changes the InputFormat and RecordReader interfaces.  Not only is
			
 
				+    FileSplit replaced with Split, but a FileSystem parameter is no
			
 
				+    longer passed in several methods, input validation has changed,
			
 
				+    etc.  (omalley via cutting)
			
 
				+
			
 
				+36. HADOOP-814.  Optimize locking in namenode. (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+37. HADOOP-738.  Change 'fs -put' and 'fs -get' commands to accept
			
 
				+    standard input and output, respectively.  Standard i/o is
			
 
				+    specified by a file named '-'.  (Wendy Chien via cutting)
			
 
				+
			
 
				+38. HADOOP-835.  Fix a NullPointerException reading record-compressed
			
 
				+    SequenceFiles.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+39. HADOOP-836.  Fix a MapReduce bug on Windows, where the wrong
			
 
				+    FileSystem was used.  Also add a static FileSystem.getLocal()
			
 
				+    method and better Path checking in HDFS, to help avoid such issues
			
 
				+    in the future.  (omalley via cutting)
			
 
				+
			
 
				+40. HADOOP-837.  Improve RunJar utility to unpack jar file
			
 
				+    hadoop.tmp.dir, rather than the system temporary directory.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+41. HADOOP-841.  Fix native library to build 32-bit version even when
			
 
				+    on a 64-bit host, if a 32-bit JVM is used.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+42. HADOOP-838.  Fix tasktracker to pass java.library.path to
			
 
				+    sub-processes, so that libhadoop.a is found.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+43. HADOOP-844.  Send metrics messages on a fixed-delay schedule
			
 
				+    instead of a fixed-rate schedule.  (David Bowen via cutting)
			
 
				+
			
 
				+44. HADOOP-849.  Fix OutOfMemory exceptions in TaskTracker due to a
			
 
				+    file handle leak in SequenceFile.  (Devaraj Das via cutting)
			
 
				+
			
 
				+45. HADOOP-745.  Fix a synchronization bug in the HDFS namenode.
			
 
				+    (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+46. HADOOP-850.  Add Writable implementations for variable-length
			
 
				+    integers.  (ab via cutting)
			
 
				+
			
 
				+47. HADOOP-525.  Add raw comparators to record types.  This greatly
			
 
				+    improves record sort performance.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+48. HADOOP-628.  Fix a problem with 'fs -cat' command, where some
			
 
				+    characters were replaced with question marks.  (Wendy Chien via cutting)
			
 
				+
			
 
				+49. HADOOP-804.  Reduce verbosity of MapReduce logging.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+50. HADOOP-853.  Rename 'site' to 'docs', in preparation for inclusion
			
 
				+    in releases.  (cutting)
			
 
				+
			
 
				+51. HADOOP-371.  Include contrib jars and site documentation in
			
 
				+    distributions.  Also add contrib and example documentation to
			
 
				+    distributed javadoc, in separate sections.  (Nigel Daley via cutting)
			
 
				+
			
 
				+52. HADOOP-846.  Report progress during entire map, as sorting of
			
 
				+    intermediate outputs may happen at any time, potentially causing
			
 
				+    task timeouts.  (Devaraj Das via cutting)
			
 
				+
			
 
				+53. HADOOP-840.  In task tracker, queue task cleanups and perform them
			
 
				+    in a separate thread.  (omalley & Mahadev Konar via cutting)
			
 
				+
			
 
				+54. HADOOP-681.  Add to HDFS the ability to decommission nodes.  This
			
 
				+    causes their blocks to be re-replicated on other nodes, so that
			
 
				+    they may be removed from a cluster.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+55. HADOOP-470.  In HDFS web ui, list the datanodes containing each
			
 
				+    copy of a block.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+56. HADOOP-700.  Change bin/hadoop to only include core jar file on
			
 
				+    classpath, not example, test, etc.  Also rename core jar to
			
 
				+    hadoop-${version}-core.jar so that it can be more easily
			
 
				+    identified.  (Nigel Daley via cutting)
			
 
				+
			
 
				+57. HADOOP-619.  Extend InputFormatBase to accept individual files and
			
 
				+    glob patterns as MapReduce inputs, not just directories.  Also
			
 
				+    change contrib/streaming to use this.  (Sanjay Dahia via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.9.2 - 2006-12-15
			
 
				+
			
 
				+ 1. HADOOP-639. Restructure InterTrackerProtocol to make task
			
 
				+    accounting more reliable.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 2. HADOOP-827. Turn off speculative execution by default, since it's
			
 
				+    currently broken.  (omalley via cutting)
			
 
				+
			
 
				+ 3. HADOOP-791. Fix a deadlock in the task tracker.
			
 
				+    (Mahadev Konar via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.9.1 - 2006-12-06
			
 
				+
			
 
				+ 1. HADOOP-780. Use ReflectionUtils to instantiate key and value
			
 
				+    objects. (ab)
			
 
				+
			
 
				+ 2. HADOOP-779. Fix contrib/streaming to work correctly with gzipped
			
 
				+    input files.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.9.0 - 2006-12-01
			
 
				+
			
 
				+ 1. HADOOP-655.  Remove most deprecated code.  A few deprecated things
			
 
				+    remain, notably UTF8 and some methods that are still required.
			
 
				+    Also cleaned up constructors for SequenceFile, MapFile, SetFile,
			
 
				+    and ArrayFile a bit.  (cutting)
			
 
				+
			
 
				+ 2. HADOOP-565.  Upgrade to Jetty version 6. (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+ 3. HADOOP-682.  Fix DFS format command to work correctly when
			
 
				+    configured with a non-existent directory. (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+ 4. HADOOP-645.  Fix a bug in contrib/streaming when -reducer is NONE.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+ 5. HADOOP-687.  Fix a classpath bug in bin/hadoop that blocked the
			
 
				+    servers from starting. (Sameer Paranjpye via omalley)
			
 
				+
			
 
				+ 6. HADOOP-683.  Remove a script dependency on bash, so it works with
			
 
				+    dash, the new default for /bin/sh on Ubuntu.  (James Todd via cutting)
			
 
				+
			
 
				+ 7. HADOOP-382.  Extend unit tests to run multiple datanodes.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 8. HADOOP-604.  Fix some synchronization issues and a
			
 
				+    NullPointerException in DFS datanode.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+ 9. HADOOP-459.  Fix memory leaks and a host of other issues with
			
 
				+    libhdfs.  (Sameer Paranjpye via cutting)
			
 
				+
			
 
				+10. HADOOP-694.  Fix a NullPointerException in jobtracker.
			
 
				+    (Mahadev Konar via cutting)
			
 
				+
			
 
				+11. HADOOP-637.  Fix a memory leak in the IPC server.  Direct buffers
			
 
				+    are not collected like normal buffers, and provided little
			
 
				+    advantage.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+12. HADOOP-696.  Fix TestTextInputFormat unit test to not rely on the
			
 
				+    order of directory listings.  (Sameer Paranjpye via cutting)
			
 
				+
			
 
				+13. HADOOP-611.  Add support for iterator-based merging to
			
 
				+    SequenceFile.  (Devaraj Das via cutting)
			
 
				+
			
 
				+14. HADOOP-688.  Move DFS administrative commands to a separate
			
 
				+    command named 'dfsadmin'.  (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+15. HADOOP-708.  Fix test-libhdfs to return the correct status, so
			
 
				+    that failures will break the build.  (Nigel Daley via cutting)
			
 
				+
			
 
				+16. HADOOP-646.  Fix namenode to handle edits files larger than 2GB.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+17. HADOOP-705.  Fix a bug in the JobTracker when failed jobs were
			
 
				+    not completely cleaned up.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+18. HADOOP-613.  Perform final merge while reducing.  This removes one
			
 
				+    sort pass over the data and should consequently significantly
			
 
				+    decrease overall processing time.  (Devaraj Das via cutting)
			
 
				+
			
 
				+19. HADOOP-661.  Make each job's configuration visible through the web
			
 
				+    ui.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+20. HADOOP-489.  In MapReduce, separate user logs from system logs.
			
 
				+    Each task's log output is now available through the web ui.  (Arun
			
 
				+    C Murthy via cutting)
			
 
				+
			
 
				+21. HADOOP-712.  Fix record io's xml serialization to correctly handle
			
 
				+    control-characters.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+22. HADOOP-668.  Improvements to the web-based DFS browser.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+23. HADOOP-715.  Fix build.xml so that test logs are written in build
			
 
				+    directory, rather than in CWD.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+24. HADOOP-538.  Add support for building an optional native library,
			
 
				+    libhadoop.so, that improves the performance of zlib-based
			
 
				+    compression.  To build this, specify -Dcompile.native to Ant.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+25. HADOOP-610.  Fix an problem when the DFS block size is configured
			
 
				+    to be smaller than the buffer size, typically only when debugging.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+26. HADOOP-695.  Fix a NullPointerException in contrib/streaming.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+27. HADOOP-652.  In DFS, when a file is deleted, the block count is
			
 
				+    now decremented.  (Vladimir Krokhmalyov via cutting)
			
 
				+
			
 
				+28. HADOOP-725.  In DFS, optimize block placement algorithm,
			
 
				+    previously a performance bottleneck.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+29. HADOOP-723.  In MapReduce, fix a race condition during the
			
 
				+    shuffle, which resulted in FileNotFoundExceptions.  (omalley via cutting)
			
 
				+
			
 
				+30. HADOOP-447.  In DFS, fix getBlockSize(Path) to work with relative
			
 
				+    paths.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+31. HADOOP-733.  Make exit codes in DFShell consistent and add a unit
			
 
				+    test.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+32. HADOOP-709.  Fix contrib/streaming to work with commands that
			
 
				+    contain control characters.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+33. HADOOP-677.  In IPC, permit a version header to be transmitted
			
 
				+    when connections are established.  This will permit us to change
			
 
				+    the format of IPC requests back-compatibly in subsequent releases.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+34. HADOOP-699.  Fix DFS web interface so that filesystem browsing
			
 
				+    works correctly, using the right port number.  Also add support
			
 
				+    for sorting datanode list by various columns.
			
 
				+    (Raghu Angadi via cutting)
			
 
				+
			
 
				+35. HADOOP-76.  Implement speculative reduce.  Now when a job is
			
 
				+    configured for speculative execution, both maps and reduces will
			
 
				+    execute speculatively.  Reduce outputs are written to temporary
			
 
				+    location and moved to the final location when reduce is complete.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+36. HADOOP-736.  Roll back to Jetty 5.1.4, due to performance problems
			
 
				+    with Jetty 6.0.1.
			
 
				+
			
 
				+37. HADOOP-739.  Fix TestIPC to use different port number, making it
			
 
				+    more reliable.  (Nigel Daley via cutting)
			
 
				+
			
 
				+38. HADOOP-749.  Fix a NullPointerException in jobfailures.jsp.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+39. HADOOP-747.  Fix record serialization to work correctly when
			
 
				+    records are embedded in Maps.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+40. HADOOP-698.  Fix HDFS client not to retry the same datanode on
			
 
				+    read failures.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+41. HADOOP-689. Add GenericWritable, to facilitate polymorphism in
			
 
				+    MapReduce, SequenceFile, etc. (Feng Jiang via cutting)
			
 
				+
			
 
				+42. HADOOP-430.  Stop datanode's HTTP server when registration with
			
 
				+    namenode fails.  (Wendy Chien via cutting)
			
 
				+
			
 
				+43. HADOOP-750.  Fix a potential race condition during mapreduce
			
 
				+    shuffle.  (omalley via cutting)
			
 
				+
			
 
				+44. HADOOP-728.  Fix contrib/streaming-related issues, including
			
 
				+    '-reducer NONE'.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.8.0 - 2006-11-03
			
 
				+
			
 
				+ 1. HADOOP-477.  Extend contrib/streaming to scan the PATH environment
			
 
				+    variables when resolving executable program names.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+ 2. HADOOP-583.  In DFSClient, reduce the log level of re-connect
			
 
				+    attempts from 'info' to 'debug', so they are not normally shown.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 3. HADOOP-498.  Re-implement DFS integrity checker to run server-side,
			
 
				+    for much improved performance.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 4. HADOOP-586.  Use the jar name for otherwise un-named jobs.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+ 5. HADOOP-514.  Make DFS heartbeat interval configurable.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 6. HADOOP-588.  Fix logging and accounting of failed tasks.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+ 7. HADOOP-462.  Improve command line parsing in DFSShell, so that
			
 
				+    incorrect numbers of arguments result in informative errors rather
			
 
				+    than ArrayOutOfBoundsException.  (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+ 8. HADOOP-561.  Fix DFS so that one replica of each block is written
			
 
				+    locally, if possible.  This was the intent, but there as a bug.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+ 9. HADOOP-610.  Fix TaskTracker to survive more exceptions, keeping
			
 
				+    tasks from becoming lost.  (omalley via cutting)
			
 
				+
			
 
				+10. HADOOP-625.  Add a servlet to all http daemons that displays a
			
 
				+    stack dump, useful for debugging.  (omalley via cutting)
			
 
				+
			
 
				+11. HADOOP-554.  Fix DFSShell to return -1 for errors.
			
 
				+    (Dhruba Borthakur via cutting) 
			
 
				+
			
 
				+12. HADOOP-626.  Correct the documentation in the NNBench example
			
 
				+    code, and also remove a mistaken call there.
			
 
				+    (Nigel Daley via cutting)
			
 
				+
			
 
				+13. HADOOP-634.  Add missing license to many files.
			
 
				+    (Nigel Daley via cutting)
			
 
				+
			
 
				+14. HADOOP-627.  Fix some synchronization problems in MiniMRCluster
			
 
				+    that sometimes caused unit tests to fail.  (Nigel Daley via cutting)
			
 
				+
			
 
				+15. HADOOP-563.  Improve the NameNode's lease policy so that leases
			
 
				+    are held for one hour without renewal (instead of one minute).
			
 
				+    However another attempt to create the same file will still succeed
			
 
				+    if the lease has not been renewed within a minute.  This prevents
			
 
				+    communication or scheduling problems from causing a write to fail
			
 
				+    for up to an hour, barring some other process trying to create the
			
 
				+    same file.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+16. HADOOP-635.  In DFSShell, permit specification of multiple files
			
 
				+    as the source for file copy and move commands.
			
 
				+    (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+17. HADOOP-641.  Change NameNode to request a fresh block report from
			
 
				+    a re-discovered DataNode, so that no-longer-needed replications
			
 
				+    are stopped promptly.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+18. HADOOP-642.  Change IPC client to specify an explicit connect
			
 
				+    timeout.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+19. HADOOP-638.  Fix an unsynchronized access to TaskTracker's
			
 
				+    internal state.  (Nigel Daley via cutting)
			
 
				+
			
 
				+20. HADOOP-624.  Fix servlet path to stop a Jetty warning on startup.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+21. HADOOP-578.  Failed tasks are no longer placed at the end of the
			
 
				+    task queue.  This was originally done to work around other
			
 
				+    problems that have now been fixed.  Re-executing failed tasks
			
 
				+    sooner causes buggy jobs to fail faster.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+22. HADOOP-658.  Update source file headers per Apache policy.  (cutting)
			
 
				+
			
 
				+23. HADOOP-636.  Add MapFile & ArrayFile constructors which accept a
			
 
				+    Progressable, and pass it down to SequenceFile.  This permits
			
 
				+    reduce tasks which use MapFile to still report progress while
			
 
				+    writing blocks to the filesystem.  (cutting)
			
 
				+
			
 
				+24. HADOOP-576.  Enable contrib/streaming to use the file cache.  Also
			
 
				+    extend the cache to permit symbolic links to cached items, rather
			
 
				+    than local file copies.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+25. HADOOP-482.  Fix unit tests to work when a cluster is running on
			
 
				+    the same machine, removing port conflicts.  (Wendy Chien via cutting)
			
 
				+
			
 
				+26. HADOOP-90.  Permit dfs.name.dir to list multiple directories,
			
 
				+    where namenode data is to be replicated. (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+27. HADOOP-651.  Fix DFSCk to correctly pass parameters to the servlet
			
 
				+    on the namenode.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+28. HADOOP-553.  Change main() routines of DataNode and NameNode to
			
 
				+    log exceptions rather than letting the JVM print them to standard
			
 
				+    error.  Also, change the hadoop-daemon.sh script to rotate
			
 
				+    standard i/o log files.  (Raghu Angadi via cutting)
			
 
				+
			
 
				+29. HADOOP-399.  Fix javadoc warnings.  (Nigel Daley via cutting)
			
 
				+
			
 
				+30. HADOOP-599.  Fix web ui and command line to correctly report DFS
			
 
				+    filesystem size statistics.  Also improve web layout.
			
 
				+    (Raghu Angadi via cutting)
			
 
				+
			
 
				+31. HADOOP-660.  Permit specification of junit test output format.
			
 
				+    (Nigel Daley via cutting)
			
 
				+
			
 
				+32. HADOOP-663.  Fix a few unit test issues.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+33. HADOOP-664.  Cause entire build to fail if libhdfs tests fail.
			
 
				+    (Nigel Daley via cutting)
			
 
				+
			
 
				+34. HADOOP-633.  Keep jobtracker from dying when job initialization
			
 
				+    throws exceptions.  Also improve exception handling in a few other
			
 
				+    places and add more informative thread names.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+35. HADOOP-669.  Fix a problem introduced by HADOOP-90 that can cause
			
 
				+    DFS to lose files.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+36. HADOOP-373.  Consistently check the value returned by
			
 
				+    FileSystem.mkdirs().  (Wendy Chien via cutting)
			
 
				+
			
 
				+37. HADOOP-670.  Code cleanups in some DFS internals: use generic
			
 
				+    types, replace Vector with ArrayList, etc.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+38. HADOOP-647.  Permit map outputs to use a different compression
			
 
				+    type than the job output.  (omalley via cutting)
			
 
				+
			
 
				+39. HADOOP-671.  Fix file cache to check for pre-existence before
			
 
				+    creating .  (Mahadev Konar via cutting)
			
 
				+
			
 
				+40. HADOOP-665.  Extend many DFSShell commands to accept multiple
			
 
				+    arguments.  Now commands like "ls", "rm", etc. will operate on
			
 
				+    multiple files.  (Dhruba Borthakur via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.7.2 - 2006-10-18
			
 
				+
			
 
				+ 1. HADOOP-607.  Fix a bug where classes included in job jars were not
			
 
				+    found by tasks.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+ 2. HADOOP-609.  Add a unit test that checks that classes in job jars
			
 
				+    can be found by tasks.  Also modify unit tests to specify multiple
			
 
				+    local directories.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.7.1 - 2006-10-11
			
 
				+
			
 
				+ 1. HADOOP-593.  Fix a NullPointerException in the JobTracker.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+ 2. HADOOP-592.  Fix a NullPointerException in the IPC Server.  Also
			
 
				+    consistently log when stale calls are discarded.  (omalley via cutting)
			
 
				+
			
 
				+ 3. HADOOP-594.  Increase the DFS safe-mode threshold from .95 to
			
 
				+    .999, so that nearly all blocks must be reported before filesystem
			
 
				+    modifications are permitted.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 4. HADOOP-598.  Fix tasks to retry when reporting completion, so that
			
 
				+    a single RPC timeout won't fail a task.  (omalley via cutting)
			
 
				+
			
 
				+ 5. HADOOP-597.  Fix TaskTracker to not discard map outputs for errors
			
 
				+    in transmitting them to reduce nodes.  (omalley via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.7.0 - 2006-10-06
			
 
				+
			
 
				+ 1. HADOOP-243.  Fix rounding in the display of task and job progress
			
 
				+    so that things are not shown to be 100% complete until they are in
			
 
				+    fact finished.  (omalley via cutting) 
			
 
				+
			
 
				+ 2. HADOOP-438.  Limit the length of absolute paths in DFS, since the
			
 
				+    file format used to store pathnames has some limitations.
			
 
				+    (Wendy Chien via cutting)
			
 
				+
			
 
				+ 3. HADOOP-530.  Improve error messages in SequenceFile when keys or
			
 
				+    values are of the wrong type.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 4. HADOOP-288.  Add a file caching system and use it in MapReduce to
			
 
				+    cache job jar files on slave nodes.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+ 5. HADOOP-533.  Fix unit test to not modify conf directory.
			
 
				+   (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 6. HADOOP-527.  Permit specification of the local address that various
			
 
				+    Hadoop daemons should bind to.  (Philippe Gassmann via cutting)
			
 
				+
			
 
				+ 7. HADOOP-542.  Updates to contrib/streaming: reformatted source code,
			
 
				+    on-the-fly merge sort, a fix for HADOOP-540, etc.
			
 
				+    (Michel Tourn via cutting)
			
 
				+
			
 
				+ 8. HADOOP-545.  Remove an unused config file parameter.
			
 
				+    (Philippe Gassmann via cutting)
			
 
				+
			
 
				+ 9. HADOOP-548.  Add an Ant property "test.output" to build.xml that
			
 
				+    causes test output to be logged to the console.  (omalley via cutting)
			
 
				+
			
 
				+10. HADOOP-261.  Record an error message when map output is lost.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+11. HADOOP-293.  Report the full list of task error messages in the
			
 
				+    web ui, not just the most recent.  (omalley via cutting)
			
 
				+
			
 
				+12. HADOOP-551.  Restore JobClient's console printouts to only include
			
 
				+    a maximum of one update per one percent of progress.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+13. HADOOP-306.  Add a "safe" mode to DFS.  The name node enters this
			
 
				+    when less than a specified percentage of file data is complete.
			
 
				+    Currently safe mode is only used on startup, but eventually it
			
 
				+    will also be entered when datanodes disconnect and file data
			
 
				+    becomes incomplete.  While in safe mode no filesystem
			
 
				+    modifications are permitted and block replication is inhibited.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+14. HADOOP-431.  Change 'dfs -rm' to not operate recursively and add a
			
 
				+    new command, 'dfs -rmr' which operates recursively.
			
 
				+    (Sameer Paranjpye via cutting)
			
 
				+
			
 
				+15. HADOOP-263.  Include timestamps for job transitions.  The web
			
 
				+    interface now displays the start and end times of tasks and the
			
 
				+    start times of sorting and reducing for reduce tasks.  Also,
			
 
				+    extend ObjectWritable to handle enums, so that they can be passed
			
 
				+    as RPC parameters.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+16. HADOOP-556.  Contrib/streaming: send keep-alive reports to task
			
 
				+    tracker every 10 seconds rather than every 100 records, to avoid
			
 
				+    task timeouts.  (Michel Tourn via cutting)
			
 
				+
			
 
				+17. HADOOP-547.  Fix reduce tasks to ping tasktracker while copying
			
 
				+    data, rather than only between copies, avoiding task timeouts.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+18. HADOOP-537.  Fix src/c++/libhdfs build process to create files in
			
 
				+    build/, no longer modifying the source tree.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+19. HADOOP-487.  Throw a more informative exception for unknown RPC
			
 
				+    hosts.  (Sameer Paranjpye via cutting)
			
 
				+
			
 
				+20. HADOOP-559.  Add file name globbing (pattern matching) support to
			
 
				+    the FileSystem API, and use it in DFSShell ('bin/hadoop dfs')
			
 
				+    commands.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+21. HADOOP-508.  Fix a bug in FSDataInputStream.  Incorrect data was
			
 
				+    returned after seeking to a random location.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+22. HADOOP-560.  Add a "killed" task state.  This can be used to
			
 
				+    distinguish kills from other failures.  Task state has also been
			
 
				+    converted to use an enum type instead of an int, uncovering a bug
			
 
				+    elsewhere.  The web interface is also updated to display killed
			
 
				+    tasks.  (omalley via cutting)
			
 
				+
			
 
				+23. HADOOP-423.  Normalize Paths containing directories named "." and
			
 
				+    "..", using the standard, unix interpretation.  Also add checks in
			
 
				+    DFS, prohibiting the use of "." or ".." as directory or file
			
 
				+    names.  (Wendy Chien via cutting)
			
 
				+
			
 
				+24. HADOOP-513.  Replace map output handling with a servlet, rather
			
 
				+    than a JSP page.  This fixes an issue where
			
 
				+    IllegalStateException's were logged, sets content-length
			
 
				+    correctly, and better handles some errors.  (omalley via cutting)
			
 
				+
			
 
				+25. HADOOP-552.  Improved error checking when copying map output files
			
 
				+    to reduce nodes.  (omalley via cutting)
			
 
				+
			
 
				+26. HADOOP-566.  Fix scripts to work correctly when accessed through
			
 
				+    relative symbolic links.  (Lee Faris via cutting)
			
 
				+
			
 
				+27. HADOOP-519.  Add positioned read methods to FSInputStream.  These
			
 
				+    permit one to read from a stream without moving its position, and
			
 
				+    can hence be performed by multiple threads at once on a single
			
 
				+    stream. Implement an optimized version for DFS and local FS.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+28. HADOOP-522. Permit block compression with MapFile and SetFile.
			
 
				+    Since these formats are always sorted, block compression can
			
 
				+    provide a big advantage.  (cutting)
			
 
				+
			
 
				+29. HADOOP-567. Record version and revision information in builds.  A
			
 
				+    package manifest is added to the generated jar file containing
			
 
				+    version information, and a VersionInfo utility is added that
			
 
				+    includes further information, including the build date and user,
			
 
				+    and the subversion revision and repository.  A 'bin/hadoop
			
 
				+    version' comand is added to show this information, and it is also
			
 
				+    added to various web interfaces.  (omalley via cutting)
			
 
				+
			
 
				+30. HADOOP-568.  Fix so that errors while initializing tasks on a
			
 
				+    tasktracker correctly report the task as failed to the jobtracker,
			
 
				+    so that it will be rescheduled.  (omalley via cutting)
			
 
				+
			
 
				+31. HADOOP-550.  Disable automatic UTF-8 validation in Text.  This
			
 
				+    permits, e.g., TextInputFormat to again operate on non-UTF-8 data.
			
 
				+    (Hairong and Mahadev via cutting)
			
 
				+
			
 
				+32. HADOOP-343.  Fix mapred copying so that a failed tasktracker
			
 
				+    doesn't cause other copies to slow.  (Sameer Paranjpye via cutting)
			
 
				+
			
 
				+33. HADOOP-239.  Add a persistent job history mechanism, so that basic
			
 
				+    job statistics are not lost after 24 hours and/or when the
			
 
				+    jobtracker is restarted.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+34. HADOOP-506.  Ignore heartbeats from stale task trackers.
			
 
				+   (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+35. HADOOP-255.  Discard stale, queued IPC calls.  Do not process
			
 
				+    calls whose clients will likely time out before they receive a
			
 
				+    response.  When the queue is full, new calls are now received and
			
 
				+    queued, and the oldest calls are discarded, so that, when servers
			
 
				+    get bogged down, they no longer develop a backlog on the socket.
			
 
				+    This should improve some DFS namenode failure modes.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+36. HADOOP-581.  Fix datanode to not reset itself on communications
			
 
				+    errors with the namenode.  If a request to the namenode fails, the
			
 
				+    datanode should retry, not restart.  This reduces the load on the
			
 
				+    namenode, since restarts cause a resend of the block report.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.6.2 - 2006-09-18
			
 
				+
			
 
				+1. HADOOP-532.  Fix a bug reading value-compressed sequence files,
			
 
				+   where an exception was thrown reporting that the full value had not
			
 
				+   been read.  (omalley via cutting)
			
 
				+
			
 
				+2. HADOOP-534.  Change the default value class in JobConf to be Text
			
 
				+   instead of the now-deprecated UTF8.  This fixes the Grep example
			
 
				+   program, which was updated to use Text, but relies on this
			
 
				+   default.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.6.1 - 2006-09-13
			
 
				+
			
 
				+ 1. HADOOP-520.  Fix a bug in libhdfs, where write failures were not
			
 
				+    correctly returning error codes.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 2. HADOOP-523.  Fix a NullPointerException when TextInputFormat is
			
 
				+    explicitly specified.  Also add a test case for this.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+ 3. HADOOP-521.  Fix another NullPointerException finding the
			
 
				+    ClassLoader when using libhdfs.  (omalley via cutting)
			
 
				+
			
 
				+ 4. HADOOP-526.  Fix a NullPointerException when attempting to start
			
 
				+    two datanodes in the same directory.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 5. HADOOP-529.  Fix a NullPointerException when opening
			
 
				+    value-compressed sequence files generated by pre-0.6.0 Hadoop.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.6.0 - 2006-09-08
			
 
				+
			
 
				+ 1. HADOOP-427.  Replace some uses of DatanodeDescriptor in the DFS
			
 
				+    web UI code with DatanodeInfo, the preferred public class.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+ 2. HADOOP-426.  Fix streaming contrib module to work correctly on
			
 
				+    Solaris.  This was causing nightly builds to fail.
			
 
				+    (Michel Tourn via cutting)
			
 
				+
			
 
				+ 3. HADOOP-400.  Improvements to task assignment.  Tasks are no longer
			
 
				+    re-run on nodes where they have failed (unless no other node is
			
 
				+    available).  Also, tasks are better load-balanced among nodes.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+ 4. HADOOP-324.  Fix datanode to not exit when a disk is full, but
			
 
				+    rather simply to fail writes.  (Wendy Chien via cutting)
			
 
				+
			
 
				+ 5. HADOOP-434.  Change smallJobsBenchmark to use standard Hadoop
			
 
				+    scripts.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+ 6. HADOOP-453.  Fix a bug in Text.setCapacity().  (siren via cutting)
			
 
				+
			
 
				+
			
 
				+ 7. HADOOP-450.  Change so that input types are determined by the
			
 
				+    RecordReader rather than specified directly in the JobConf.  This
			
 
				+    facilitates jobs with a variety of input types.
			
 
				+
			
 
				+    WARNING: This contains incompatible API changes!  The RecordReader
			
 
				+    interface has two new methods that all user-defined InputFormats
			
 
				+    must now define.  Also, the values returned by TextInputFormat are
			
 
				+    no longer of class UTF8, but now of class Text.
			
 
				+
			
 
				+ 8. HADOOP-436.  Fix an error-handling bug in the web ui.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+ 9. HADOOP-455.  Fix a bug in Text, where DEL was not permitted.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+10. HADOOP-456.  Change the DFS namenode to keep a persistent record
			
 
				+    of the set of known datanodes.  This will be used to implement a
			
 
				+    "safe mode" where filesystem changes are prohibited when a
			
 
				+    critical percentage of the datanodes are unavailable.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+11. HADOOP-322.  Add a job control utility.  This permits one to
			
 
				+    specify job interdependencies.  Each job is submitted only after
			
 
				+    the jobs it depends on have successfully completed.
			
 
				+    (Runping Qi via cutting)
			
 
				+
			
 
				+12. HADOOP-176.  Fix a bug in IntWritable.Comparator.
			
 
				+    (Dick King via cutting)
			
 
				+
			
 
				+13. HADOOP-421.  Replace uses of String in recordio package with Text
			
 
				+    class, for improved handling of UTF-8 data.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+14. HADOOP-464.  Improved error message when job jar not found.
			
 
				+    (Michel Tourn via cutting)
			
 
				+
			
 
				+15. HADOOP-469.  Fix /bin/bash specifics that have crept into our
			
 
				+    /bin/sh scripts since HADOOP-352.
			
 
				+    (Jean-Baptiste Quenot via cutting)
			
 
				+
			
 
				+16. HADOOP-468.  Add HADOOP_NICENESS environment variable to set
			
 
				+    scheduling priority for daemons.  (Vetle Roeim via cutting)
			
 
				+
			
 
				+17. HADOOP-473.  Fix TextInputFormat to correctly handle more EOL
			
 
				+    formats.  Things now work correctly with CR, LF or CRLF.
			
 
				+    (Dennis Kubes & James White via cutting)
			
 
				+
			
 
				+18. HADOOP-461.  Make Java 1.5 an explicit requirement.  (cutting)
			
 
				+
			
 
				+19. HADOOP-54.  Add block compression to SequenceFile.  One may now
			
 
				+    specify that blocks of keys and values are compressed together,
			
 
				+    improving compression for small keys and values.
			
 
				+    SequenceFile.Writer's constructor is now deprecated and replaced
			
 
				+    with a factory method.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+20. HADOOP-281.  Prohibit DFS files that are also directories.
			
 
				+    (Wendy Chien via cutting)
			
 
				+
			
 
				+21. HADOOP-486.  Add the job username to JobStatus instances returned
			
 
				+    by JobClient.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+22. HADOOP-437.  contrib/streaming: Add support for gzipped inputs.
			
 
				+    (Michel Tourn via cutting)
			
 
				+
			
 
				+23. HADOOP-463.  Add variable expansion to config files.
			
 
				+    Configuration property values may now contain variable
			
 
				+    expressions.  A variable is referenced with the syntax
			
 
				+    '${variable}'.  Variables values are found first in the
			
 
				+    configuration, and then in Java system properties.  The default
			
 
				+    configuration is modified so that temporary directories are now
			
 
				+    under ${hadoop.tmp.dir}, which is, by default,
			
 
				+    /tmp/hadoop-${user.name}.  (Michel Tourn via cutting)
			
 
				+
			
 
				+24. HADOOP-419. Fix a NullPointerException finding the ClassLoader
			
 
				+    when using libhdfs.  (omalley via cutting)
			
 
				+
			
 
				+25. HADOOP-460. Fix contrib/smallJobsBenchmark to use Text instead of
			
 
				+    UTF8.  (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+26. HADOOP-196.  Fix Configuration(Configuration) constructor to work
			
 
				+    correctly.  (Sami Siren via cutting)
			
 
				+
			
 
				+27. HADOOP-501.  Fix Configuration.toString() to handle URL resources.
			
 
				+    (Thomas Friol via cutting)
			
 
				+
			
 
				+28. HADOOP-499.  Reduce the use of Strings in contrib/streaming,
			
 
				+    replacing them with Text for better performance.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+29. HADOOP-64.  Manage multiple volumes with a single DataNode.
			
 
				+    Previously DataNode would create a separate daemon per configured
			
 
				+    volume, each with its own connection to the NameNode.  Now all
			
 
				+    volumes are handled by a single DataNode daemon, reducing the load
			
 
				+    on the NameNode.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+30. HADOOP-424.  Fix MapReduce so that jobs which generate zero splits
			
 
				+    do not fail.  (Fr??d??ric Bertin via cutting)
			
 
				+
			
 
				+31. HADOOP-408.  Adjust some timeouts and remove some others so that
			
 
				+    unit tests run faster.  (cutting)
			
 
				+
			
 
				+32. HADOOP-507.  Fix an IllegalAccessException in DFS.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+33. HADOOP-320.  Fix so that checksum files are correctly copied when
			
 
				+    the destination of a file copy is a directory.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+34. HADOOP-286.  In DFSClient, avoid pinging the NameNode with
			
 
				+    renewLease() calls when no files are being written.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+35. HADOOP-312.  Close idle IPC connections.  All IPC connections were
			
 
				+    cached forever.  Now, after a connection has been idle for more
			
 
				+    than a configurable amount of time (one second by default), the
			
 
				+    connection is closed, conserving resources on both client and
			
 
				+    server. (Devaraj Das via cutting)
			
 
				+
			
 
				+36. HADOOP-497.  Permit the specification of the network interface and
			
 
				+    nameserver to be used when determining the local hostname
			
 
				+    advertised by datanodes and tasktrackers.
			
 
				+    (Lorenzo Thione via cutting)
			
 
				+
			
 
				+37. HADOOP-441.  Add a compression codec API and extend SequenceFile
			
 
				+    to use it.  This will permit the use of alternate compression
			
 
				+    codecs in SequenceFile.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+38. HADOOP-483. Improvements to libhdfs build and documentation.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+39. HADOOP-458.  Fix a memory corruption bug in libhdfs.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+40. HADOOP-517.  Fix a contrib/streaming bug in end-of-line detection.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+41. HADOOP-474.  Add CompressionCodecFactory, and use it in
			
 
				+    TextInputFormat and TextOutputFormat.  Compressed input files are
			
 
				+    automatically decompressed when they have the correct extension.
			
 
				+    Output files will, when output compression is specified, be
			
 
				+    generated with an approprate extension.  Also add a gzip codec and
			
 
				+    fix problems with UTF8 text inputs.  (omalley via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.5.0 - 2006-08-04
			
 
				+
			
 
				+ 1. HADOOP-352.  Fix shell scripts to use /bin/sh instead of
			
 
				+    /bin/bash, for better portability.
			
 
				+    (Jean-Baptiste Quenot via cutting)
			
 
				+
			
 
				+ 2. HADOOP-313.  Permit task state to be saved so that single tasks
			
 
				+    may be manually re-executed when debugging.  (omalley via cutting)
			
 
				+
			
 
				+ 3. HADOOP-339.  Add method to JobClient API listing jobs that are
			
 
				+    not yet complete, i.e., that are queued or running.
			
 
				+    (Mahadev Konar via cutting)
			
 
				+
			
 
				+ 4. HADOOP-355.  Updates to the streaming contrib module, including
			
 
				+    API fixes, making reduce optional, and adding an input type for
			
 
				+    StreamSequenceRecordReader.  (Michel Tourn via cutting)
			
 
				+
			
 
				+ 5. HADOOP-358.  Fix a NPE bug in Path.equals().
			
 
				+    (Fr??d??ric Bertin via cutting)
			
 
				+
			
 
				+ 6. HADOOP-327.  Fix ToolBase to not call System.exit() when
			
 
				+    exceptions are thrown.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 7. HADOOP-359.  Permit map output to be compressed.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+ 8. HADOOP-341.  Permit input URI to CopyFiles to use the HTTP
			
 
				+    protocol.  This lets one, e.g., more easily copy log files into
			
 
				+    DFS.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+ 9. HADOOP-361.  Remove unix dependencies from streaming contrib
			
 
				+    module tests, making them pure java. (Michel Tourn via cutting)
			
 
				+
			
 
				+10. HADOOP-354.  Make public methods to stop DFS daemons.
			
 
				+    (Barry Kaplan via cutting)
			
 
				+
			
 
				+11. HADOOP-252.  Add versioning to RPC protocols.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+12. HADOOP-356.  Add contrib to "compile" and "test" build targets, so
			
 
				+    that this code is better maintained. (Michel Tourn via cutting)
			
 
				+
			
 
				+13. HADOOP-307.  Add smallJobsBenchmark contrib module.  This runs
			
 
				+    lots of small jobs, in order to determine per-task overheads.
			
 
				+    (Sanjay Dahiya via cutting)
			
 
				+
			
 
				+14. HADOOP-342.  Add a tool for log analysis: Logalyzer.
			
 
				+    (Arun C Murthy via cutting)
			
 
				+
			
 
				+15. HADOOP-347.  Add web-based browsing of DFS content.  The namenode
			
 
				+    redirects browsing requests to datanodes.  Content requests are
			
 
				+    redirected to datanodes where the data is local when possible.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+16. HADOOP-351.  Make Hadoop IPC kernel independent of Jetty.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+17. HADOOP-237.  Add metric reporting to DFS and MapReduce.  With only
			
 
				+    minor configuration changes, one can now monitor many Hadoop
			
 
				+    system statistics using Ganglia or other monitoring systems.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+18. HADOOP-376.  Fix datanode's HTTP server to scan for a free port.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+19. HADOOP-260.  Add --config option to shell scripts, specifying an
			
 
				+    alternate configuration directory. (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+20. HADOOP-381.  Permit developers to save the temporary files for
			
 
				+    tasks whose names match a regular expression, to facilliate
			
 
				+    debugging.  (omalley via cutting)
			
 
				+
			
 
				+21. HADOOP-344.  Fix some Windows-related problems with DF.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+22. HADOOP-380.  Fix reduce tasks to poll less frequently for map
			
 
				+    outputs. (Mahadev Konar via cutting)
			
 
				+
			
 
				+23. HADOOP-321.  Refactor DatanodeInfo, in preparation for
			
 
				+    HADOOP-306.  (Konstantin Shvachko & omalley via cutting)
			
 
				+
			
 
				+24. HADOOP-385.  Fix some bugs in record io code generation.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+25. HADOOP-302.  Add new Text class to replace UTF8, removing
			
 
				+    limitations of that class.  Also refactor utility methods for
			
 
				+    writing zero-compressed integers (VInts and VLongs).
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+26. HADOOP-335.  Refactor DFS namespace/transaction logging in
			
 
				+    namenode.   (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+27. HADOOP-375.  Fix handling of the datanode HTTP daemon's port so
			
 
				+    that multiple datanode's can be run on a single host.
			
 
				+    (Devaraj Das via cutting)
			
 
				+
			
 
				+28. HADOOP-386.  When removing excess DFS block replicas, remove those
			
 
				+    on nodes with the least free space first.
			
 
				+    (Johan Oskarson via cutting)
			
 
				+
			
 
				+29. HADOOP-389.  Fix intermittent failures of mapreduce unit tests.
			
 
				+    Also fix some build dependencies.
			
 
				+    (Mahadev & Konstantin via cutting)
			
 
				+
			
 
				+30. HADOOP-362.  Fix a problem where jobs hang when status messages
			
 
				+    are recieved out-of-order.  (omalley via cutting)
			
 
				+
			
 
				+31. HADOOP-394.  Change order of DFS shutdown in unit tests to
			
 
				+    minimize errors logged.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+32. HADOOP-396.  Make DatanodeID implement Writable.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+33. HADOOP-377.  Permit one to add URL resources to a Configuration.
			
 
				+    (Jean-Baptiste Quenot via cutting)
			
 
				+
			
 
				+34. HADOOP-345.  Permit iteration over Configuration key/value pairs.
			
 
				+    (Michel Tourn via cutting)
			
 
				+
			
 
				+35. HADOOP-409.  Streaming contrib module: make configuration
			
 
				+    properties available to commands as environment variables.
			
 
				+    (Michel Tourn via cutting)
			
 
				+
			
 
				+36. HADOOP-369.  Add -getmerge option to dfs command that appends all
			
 
				+    files in a directory into a single local file.
			
 
				+    (Johan Oskarson via cutting)
			
 
				+
			
 
				+37. HADOOP-410.  Replace some TreeMaps with HashMaps in DFS, for
			
 
				+    a 17% performance improvement. (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+38. HADOOP-411.  Add unit tests for command line parser.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+39. HADOOP-412.  Add MapReduce input formats that support filtering
			
 
				+    of SequenceFile data, including sampling and regex matching.
			
 
				+    Also, move JobConf.newInstance() to a new utility class.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+40. HADOOP-226.  Fix fsck command to properly consider replication
			
 
				+    counts, now that these can vary per file.  (Bryan Pendleton via cutting)
			
 
				+
			
 
				+41. HADOOP-425.  Add a Python MapReduce example, using Jython.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.4.0 - 2006-06-28
			
 
				+
			
 
				+ 1. HADOOP-298.  Improved progress reports for CopyFiles utility, the
			
 
				+    distributed file copier.  (omalley via cutting)
			
 
				+
			
 
				+ 2. HADOOP-299.  Fix the task tracker, permitting multiple jobs to
			
 
				+    more easily execute at the same time.  (omalley via cutting)
			
 
				+
			
 
				+ 3. HADOOP-250.  Add an HTTP user interface to the namenode, running
			
 
				+    on port 50070. (Devaraj Das via cutting)
			
 
				+
			
 
				+ 4. HADOOP-123.  Add MapReduce unit tests that run a jobtracker and
			
 
				+    tasktracker, greatly increasing code coverage.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 5. HADOOP-271.  Add links from jobtracker's web ui to tasktracker's
			
 
				+    web ui.  Also attempt to log a thread dump of child processes
			
 
				+    before they're killed.  (omalley via cutting)
			
 
				+
			
 
				+ 6. HADOOP-210.  Change RPC server to use a selector instead of a
			
 
				+    thread per connection.  This should make it easier to scale to
			
 
				+    larger clusters.  Note that this incompatibly changes the RPC
			
 
				+    protocol: clients and servers must both be upgraded to the new
			
 
				+    version to ensure correct operation.  (Devaraj Das via cutting)
			
 
				+
			
 
				+ 7. HADOOP-311.  Change DFS client to retry failed reads, so that a
			
 
				+    single read failure will not alone cause failure of a task.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+ 8. HADOOP-314.  Remove the "append" phase when reducing.  Map output
			
 
				+    files are now directly passed to the sorter, without first
			
 
				+    appending them into a single file.  Now, the first third of reduce
			
 
				+    progress is "copy" (transferring map output to reduce nodes), the
			
 
				+    middle third is "sort" (sorting map output) and the last third is
			
 
				+    "reduce" (generating output).  Long-term, the "sort" phase will
			
 
				+    also be removed.  (omalley via cutting)
			
 
				+
			
 
				+ 9. HADOOP-316.  Fix a potential deadlock in the jobtracker.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+10. HADOOP-319.  Fix FileSystem.close() to remove the FileSystem
			
 
				+    instance from the cache.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+11. HADOOP-135.  Fix potential deadlock in JobTracker by acquiring
			
 
				+    locks in a consistent order.  (omalley via cutting)
			
 
				+
			
 
				+12. HADOOP-278.  Check for existence of input directories before
			
 
				+    starting MapReduce jobs, making it easier to debug this common
			
 
				+    error.  (omalley via cutting)
			
 
				+
			
 
				+13. HADOOP-304.  Improve error message for
			
 
				+    UnregisterdDatanodeException to include expected node name.
			
 
				+   (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+14. HADOOP-305.  Fix TaskTracker to ask for new tasks as soon as a
			
 
				+    task is finished, rather than waiting for the next heartbeat.
			
 
				+    This improves performance when tasks are short.
			
 
				+    (Mahadev Konar via cutting)
			
 
				+
			
 
				+15. HADOOP-59.  Add support for generic command line options.  One may
			
 
				+    now specify the filesystem (-fs), the MapReduce jobtracker (-jt),
			
 
				+    a config file (-conf) or any configuration property (-D).  The
			
 
				+    "dfs", "fsck", "job", and "distcp" commands currently support
			
 
				+    this, with more to be added.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+16. HADOOP-296.  Permit specification of the amount of reserved space
			
 
				+    on a DFS datanode.  One may specify both the percentage free and
			
 
				+    the number of bytes.  (Johan Oskarson via cutting)
			
 
				+
			
 
				+17. HADOOP-325.  Fix a problem initializing RPC parameter classes, and
			
 
				+    remove the workaround used to initialize classes.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+18. HADOOP-328.  Add an option to the "distcp" command to ignore read
			
 
				+    errors while copying.  (omalley via cutting)
			
 
				+
			
 
				+19. HADOOP-27.  Don't allocate tasks to trackers whose local free
			
 
				+    space is too low.  (Johan Oskarson via cutting)
			
 
				+
			
 
				+20. HADOOP-318.  Keep slow DFS output from causing task timeouts.
			
 
				+    This incompatibly changes some public interfaces, adding a
			
 
				+    parameter to OutputFormat.getRecordWriter() and the new method
			
 
				+    Reporter.progress(), but it makes lots of tasks succeed that were
			
 
				+    previously failing.  (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.3.2 - 2006-06-09
			
 
				+
			
 
				+ 1. HADOOP-275.  Update the streaming contrib module to use log4j for
			
 
				+    its logging.  (Michel Tourn via cutting)
			
 
				+
			
 
				+ 2. HADOOP-279.  Provide defaults for log4j logging parameters, so
			
 
				+    that things still work reasonably when Hadoop-specific system
			
 
				+    properties are not provided.  (omalley via cutting)
			
 
				+
			
 
				+ 3. HADOOP-280.  Fix a typo in AllTestDriver which caused the wrong
			
 
				+    test to be run when "DistributedFSCheck" was specified.
			
 
				+   (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 4. HADOOP-240.  DFS's mkdirs() implementation no longer logs a warning
			
 
				+    when the directory already exists. (Hairong Kuang via cutting)
			
 
				+
			
 
				+ 5. HADOOP-285.  Fix DFS datanodes to be able to re-join the cluster
			
 
				+    after the connection to the namenode is lost.  (omalley via cutting)
			
 
				+
			
 
				+ 6. HADOOP-277.  Fix a race condition when creating directories.
			
 
				+   (Sameer Paranjpye via cutting)
			
 
				+
			
 
				+ 7. HADOOP-289.  Improved exception handling in DFS datanode.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 8. HADOOP-292.  Fix client-side logging to go to standard error
			
 
				+    rather than standard output, so that it can be distinguished from
			
 
				+    application output.  (omalley via cutting)
			
 
				+
			
 
				+ 9. HADOOP-294.  Fixed bug where conditions for retrying after errors
			
 
				+    in the DFS client were reversed.  (omalley via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.3.1 - 2006-06-05
			
 
				+
			
 
				+ 1. HADOOP-272.  Fix a bug in bin/hadoop setting log
			
 
				+    parameters. (omalley & cutting)
			
 
				+
			
 
				+ 2. HADOOP-274.  Change applications to log to standard output rather
			
 
				+    than to a rolling log file like daemons.  (omalley via cutting)
			
 
				+
			
 
				+ 3. HADOOP-262.  Fix reduce tasks to report progress while they're
			
 
				+    waiting for map outputs, so that they do not time out.
			
 
				+    (Mahadev Konar via cutting)
			
 
				+
			
 
				+ 4. HADOOP-245 and HADOOP-246.  Improvements to record io package.  
			
 
				+    (Mahadev Konar via cutting)
			
 
				+
			
 
				+ 5. HADOOP-276.  Add logging config files to jar file so that they're
			
 
				+    always found.  (omalley via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.3.0 - 2006-06-02
			
 
				+
			
 
				+ 1. HADOOP-208.  Enhance MapReduce web interface, adding new pages
			
 
				+    for failed tasks, and tasktrackers.  (omalley via cutting)
			
 
				+
			
 
				+ 2. HADOOP-204.  Tweaks to metrics package.  (David Bowen via cutting)
			
 
				+
			
 
				+ 3. HADOOP-209.  Add a MapReduce-based file copier.  This will
			
 
				+    copy files within or between file systems in parallel.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 4. HADOOP-146.  Fix DFS to check when randomly generating a new block
			
 
				+    id that no existing blocks already have that id.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+ 5. HADOOP-180. Make a daemon thread that does the actual task clean ups, so
			
 
				+    that the main offerService thread in the taskTracker doesn't get stuck
			
 
				+    and miss his heartbeat window. This was killing many task trackers as
			
 
				+    big jobs finished (300+ tasks / node). (omalley via cutting)
			
 
				+
			
 
				+ 6. HADOOP-200. Avoid transmitting entire list of map task names to
			
 
				+    reduce tasks.  Instead just transmit the number of map tasks and
			
 
				+    henceforth refer to them by number when collecting map output.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+ 7. HADOOP-219. Fix a NullPointerException when handling a checksum
			
 
				+    exception under SequenceFile.Sorter.sort().  (cutting & stack)
			
 
				+
			
 
				+ 8. HADOOP-212. Permit alteration of the file block size in DFS.  The
			
 
				+    default block size for new files may now be specified in the
			
 
				+    configuration with the dfs.block.size property.  The block size
			
 
				+    may also be specified when files are opened.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+ 9. HADOOP-218. Avoid accessing configuration while looping through
			
 
				+    tasks in JobTracker.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+10. HADOOP-161. Add hashCode() method to DFS's Block.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+11. HADOOP-115. Map output types may now be specified.  These are also
			
 
				+    used as reduce input types, thus permitting reduce input types to
			
 
				+    differ from reduce output types.  (Runping Qi via cutting)
			
 
				+
			
 
				+12. HADOOP-216. Add task progress to task status page.
			
 
				+    (Bryan Pendelton via cutting)
			
 
				+
			
 
				+13. HADOOP-233.  Add web server to task tracker that shows running
			
 
				+    tasks and logs.  Also add log access to job tracker web interface.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+14. HADOOP-205.  Incorporate pending tasks into tasktracker load
			
 
				+    calculations.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+15. HADOOP-247.  Fix sort progress to better handle exceptions.
			
 
				+    (Mahadev Konar via cutting)
			
 
				+
			
 
				+16. HADOOP-195.  Improve performance of the transfer of map outputs to
			
 
				+    reduce nodes by performing multiple transfers in parallel, each on
			
 
				+    a separate socket.  (Sameer Paranjpye via cutting)
			
 
				+
			
 
				+17. HADOOP-251.  Fix task processes to be tolerant of failed progress
			
 
				+    reports to their parent process.  (omalley via cutting)
			
 
				+
			
 
				+18. HADOOP-325.  Improve the FileNotFound exceptions thrown by
			
 
				+    LocalFileSystem to include the name of the file.
			
 
				+    (Benjamin Reed via cutting)
			
 
				+
			
 
				+19. HADOOP-254.  Use HTTP to transfer map output data to reduce
			
 
				+    nodes.  This, together with HADOOP-195, greatly improves the
			
 
				+    performance of these transfers.  (omalley via cutting)
			
 
				+
			
 
				+20. HADOOP-163.  Cause datanodes that\ are unable to either read or
			
 
				+    write data to exit, so that the namenode will no longer target
			
 
				+    them for new blocks and will replicate their data on other nodes.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+21. HADOOP-222.  Add a -setrep option to the dfs commands that alters
			
 
				+    file replication levels.  (Johan Oskarson via cutting)
			
 
				+
			
 
				+22. HADOOP-75.  In DFS, only check for a complete file when the file
			
 
				+    is closed, rather than as each block is written.
			
 
				+    (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+23. HADOOP-124. Change DFS so that datanodes are identified by a
			
 
				+    persistent ID rather than by host and port.  This solves a number
			
 
				+    of filesystem integrity problems, when, e.g., datanodes are
			
 
				+    restarted.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+24. HADOOP-256.  Add a C API for DFS.  (Arun C Murthy via cutting)
			
 
				+
			
 
				+25. HADOOP-211.  Switch to use the Jakarta Commons logging internally,
			
 
				+    configured to use log4j by default.  (Arun C Murthy and cutting)
			
 
				+
			
 
				+26. HADOOP-265.  Tasktracker now fails to start if it does not have a
			
 
				+    writable local directory for temporary files.  In this case, it
			
 
				+    logs a message to the JobTracker and exits. (Hairong Kuang via cutting)
			
 
				+
			
 
				+27. HADOOP-270.  Fix potential deadlock in datanode shutdown.
			
 
				+    (Hairong Kuang via cutting)
			
 
				+
			
 
				+Release 0.2.1 - 2006-05-12
			
 
				+
			
 
				+ 1. HADOOP-199.  Fix reduce progress (broken by HADOOP-182).
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+ 2. HADOOP-201.  Fix 'bin/hadoop dfs -report'.  (cutting)
			
 
				+
			
 
				+ 3. HADOOP-207.  Fix JDK 1.4 incompatibility introduced by HADOOP-96.
			
 
				+    System.getenv() does not work in JDK 1.4.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.2.0 - 2006-05-05
			
 
				+
			
 
				+ 1. Fix HADOOP-126. 'bin/hadoop dfs -cp' now correctly copies .crc
			
 
				+    files.  (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 2. Fix HADOOP-51. Change DFS to support per-file replication counts.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+ 3. Fix HADOOP-131.  Add scripts to start/stop dfs and mapred daemons.
			
 
				+    Use these in start/stop-all scripts.  (Chris Mattmann via cutting)
			
 
				+
			
 
				+ 4. Stop using ssh options by default that are not yet in widely used
			
 
				+    versions of ssh.  Folks can still enable their use by uncommenting
			
 
				+    a line in conf/hadoop-env.sh. (cutting)
			
 
				+
			
 
				+ 5. Fix HADOOP-92.  Show information about all attempts to run each
			
 
				+    task in the web ui.  (Mahadev konar via cutting)
			
 
				+
			
 
				+ 6. Fix HADOOP-128.  Improved DFS error handling. (Owen O'Malley via cutting)
			
 
				+
			
 
				+ 7. Fix HADOOP-129.  Replace uses of java.io.File with new class named
			
 
				+    Path.  This fixes bugs where java.io.File methods were called
			
 
				+    directly when FileSystem methods were desired, and reduces the
			
 
				+    likelihood of such bugs in the future.  It also makes the handling
			
 
				+    of pathnames more consistent between local and dfs FileSystems and
			
 
				+    between Windows and Unix. java.io.File-based methods are still
			
 
				+    available for back-compatibility, but are deprecated and will be
			
 
				+    removed once 0.2 is released. (cutting)
			
 
				+
			
 
				+ 8. Change dfs.data.dir and mapred.local.dir to be comma-separated
			
 
				+    lists of directories, no longer be space-separated. This fixes
			
 
				+    several bugs on Windows. (cutting)
			
 
				+
			
 
				+ 9. Fix HADOOP-144.  Use mapred task id for dfs client id, to
			
 
				+    facilitate debugging.  (omalley via cutting)
			
 
				+
			
 
				+10. Fix HADOOP-143.  Do not line-wrap stack-traces in web ui.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+11. Fix HADOOP-118.  In DFS, improve clean up of abandoned file
			
 
				+    creations.  (omalley via cutting)
			
 
				+
			
 
				+12. Fix HADOOP-138.  Stop multiple tasks in a single heartbeat, rather
			
 
				+    than one per heartbeat.  (Stefan via cutting)
			
 
				+
			
 
				+13. Fix HADOOP-139.  Remove a potential deadlock in
			
 
				+    LocalFileSystem.lock().  (Igor Bolotin via cutting)
			
 
				+
			
 
				+14. Fix HADOOP-134.  Don't hang jobs when the tasktracker is
			
 
				+    misconfigured to use an un-writable local directory.  (omalley via cutting)
			
 
				+
			
 
				+15. Fix HADOOP-115.  Correct an error message.  (Stack via cutting)
			
 
				+
			
 
				+16. Fix HADOOP-133.  Retry pings from child to parent, in case of
			
 
				+    (local) communcation problems.  Also log exit status, so that one
			
 
				+    can distinguish patricide from other deaths.  (omalley via cutting)
			
 
				+
			
 
				+17. Fix HADOOP-142.  Avoid re-running a task on a host where it has
			
 
				+    previously failed.  (omalley via cutting)
			
 
				+
			
 
				+18. Fix HADOOP-148.  Maintain a task failure count for each
			
 
				+    tasktracker and display it in the web ui.  (omalley via cutting)
			
 
				+
			
 
				+19. Fix HADOOP-151.  Close a potential socket leak, where new IPC
			
 
				+    connection pools were created per configuration instance that RPCs
			
 
				+    use.  Now a global RPC connection pool is used again, as
			
 
				+    originally intended.  (cutting)
			
 
				+
			
 
				+20. Fix HADOOP-69.  Don't throw a NullPointerException when getting
			
 
				+    hints for non-existing file split.  (Bryan Pendelton via cutting)
			
 
				+
			
 
				+21. Fix HADOOP-157.  When a task that writes dfs files (e.g., a reduce
			
 
				+    task) failed and was retried, it would fail again and again,
			
 
				+    eventually failing the job.  The problem was that dfs did not yet
			
 
				+    know that the failed task had abandoned the files, and would not
			
 
				+    yet let another task create files with the same names.  Dfs now
			
 
				+    retries when creating a file long enough for locks on abandoned
			
 
				+    files to expire.  (omalley via cutting)
			
 
				+
			
 
				+22. Fix HADOOP-150.  Improved task names that include job
			
 
				+    names. (omalley via cutting)
			
 
				+
			
 
				+23. Fix HADOOP-162.  Fix ConcurrentModificationException when
			
 
				+    releasing file locks. (omalley via cutting)
			
 
				+
			
 
				+24. Fix HADOOP-132.  Initial check-in of new Metrics API, including 
			
 
				+    implementations for writing metric data to a file and for sending
			
 
				+    it to Ganglia.  (David Bowen via cutting)
			
 
				+
			
 
				+25. Fix HADOOP-160.  Remove some uneeded synchronization around
			
 
				+    time-consuming operations in the TaskTracker.  (omalley via cutting)
			
 
				+
			
 
				+26. Fix HADOOP-166.  RPCs failed when passed subclasses of a declared
			
 
				+    parameter type.  This is fixed by changing ObjectWritable to store
			
 
				+    both the declared type and the instance type for Writables.  Note
			
 
				+    that this incompatibly changes the format of ObjectWritable and
			
 
				+    will render unreadable any ObjectWritables stored in files.
			
 
				+    Nutch only uses ObjectWritable in intermediate files, so this
			
 
				+    should not be a problem for Nutch.  (Stefan & cutting)
			
 
				+
			
 
				+27. Fix HADOOP-168.  MapReduce RPC protocol methods should all declare
			
 
				+    IOException, so that timeouts are handled appropriately.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+28. Fix HADOOP-169.  Don't fail a reduce task if a call to the
			
 
				+    jobtracker to locate map outputs fails.  (omalley via cutting)
			
 
				+
			
 
				+29. Fix HADOOP-170.  Permit FileSystem clients to examine and modify
			
 
				+    the replication count of individual files.  Also fix a few
			
 
				+    replication-related bugs. (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+30. Permit specification of a higher replication levels for job
			
 
				+    submission files (job.xml and job.jar).  This helps with large
			
 
				+    clusters, since these files are read by every node.  (cutting)
			
 
				+
			
 
				+31. HADOOP-173.  Optimize allocation of tasks with local data.  (cutting)
			
 
				+
			
 
				+32. HADOOP-167.  Reduce number of Configurations and JobConf's
			
 
				+    created.  (omalley via cutting)
			
 
				+
			
 
				+33. NUTCH-256.  Change FileSystem#createNewFile() to create a .crc
			
 
				+    file.  The lack of a .crc file was causing warnings.  (cutting)
			
 
				+
			
 
				+34. HADOOP-174.  Change JobClient to not abort job until it has failed
			
 
				+    to contact the job tracker for five attempts, not just one as
			
 
				+    before.  (omalley via cutting)
			
 
				+
			
 
				+35. HADOOP-177.  Change MapReduce web interface to page through tasks.
			
 
				+    Previously, when jobs had more than a few thousand tasks they
			
 
				+    could crash web browsers.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+36. HADOOP-178.  In DFS, piggyback blockwork requests from datanodes
			
 
				+    on heartbeat responses from namenode.  This reduces the volume of
			
 
				+    RPC traffic.  Also move startup delay in blockwork from datanode
			
 
				+    to namenode.  This fixes a problem where restarting the namenode
			
 
				+    triggered a lot of uneeded replication. (Hairong Kuang via cutting)
			
 
				+
			
 
				+37. HADOOP-183.  If the DFS namenode is restarted with different
			
 
				+    minimum and/or maximum replication counts, existing files'
			
 
				+    replication counts are now automatically adjusted to be within the
			
 
				+    newly configured bounds. (Hairong Kuang via cutting)
			
 
				+
			
 
				+38. HADOOP-186.  Better error handling in TaskTracker's top-level
			
 
				+    loop.  Also improve calculation of time to send next heartbeat.
			
 
				+    (omalley via cutting)
			
 
				+
			
 
				+39. HADOOP-187.  Add two MapReduce examples/benchmarks.  One creates
			
 
				+    files containing random data.  The second sorts the output of the
			
 
				+    first.  (omalley via cutting)
			
 
				+
			
 
				+40. HADOOP-185.  Fix so that, when a task tracker times out making the
			
 
				+    RPC asking for a new task to run, the job tracker does not think
			
 
				+    that it is actually running the task returned.  (omalley via cutting)
			
 
				+
			
 
				+41. HADOOP-190.  If a child process hangs after it has reported
			
 
				+    completion, its output should not be lost.  (Stack via cutting)
			
 
				+
			
 
				+42. HADOOP-184. Re-structure some test code to better support testing
			
 
				+    on a cluster.  (Mahadev Konar via cutting)
			
 
				+
			
 
				+43. HADOOP-191  Add streaming package, Hadoop's first contrib module.
			
 
				+    This permits folks to easily submit MapReduce jobs whose map and
			
 
				+    reduce functions are implemented by shell commands.  Use
			
 
				+    'bin/hadoop jar build/hadoop-streaming.jar' to get details.
			
 
				+    (Michel Tourn via cutting)
			
 
				+
			
 
				+44. HADOOP-189.  Fix MapReduce in standalone configuration to
			
 
				+    correctly handle job jar files that contain a lib directory with
			
 
				+    nested jar files.  (cutting)
			
 
				+
			
 
				+45. HADOOP-65.  Initial version of record I/O framework that enables
			
 
				+    the specification of record types and generates marshalling code
			
 
				+    in both Java and C++.  Generated Java code implements
			
 
				+    WritableComparable, but is not yet otherwise used by
			
 
				+    Hadoop. (Milind Bhandarkar via cutting)
			
 
				+
			
 
				+46. HADOOP-193.  Add a MapReduce-based FileSystem benchmark.
			
 
				+    (Konstantin Shvachko via cutting)
			
 
				+
			
 
				+47. HADOOP-194.  Add a MapReduce-based FileSystem checker.  This reads
			
 
				+    every block in every file in the filesystem.  (Konstantin Shvachko
			
 
				+    via cutting)
			
 
				+
			
 
				+48. HADOOP-182.  Fix so that lost task trackers to not change the
			
 
				+    status of reduce tasks or completed jobs.  Also fixes the progress
			
 
				+    meter so that failed tasks are subtracted. (omalley via cutting)
			
 
				+
			
 
				+49. HADOOP-96.  Logging improvements.  Log files are now separate from
			
 
				+    standard output and standard error files.  Logs are now rolled.
			
 
				+    Logging of all DFS state changes can be enabled, to facilitate
			
 
				+    debugging.  (Hairong Kuang via cutting)
			
 
				+
			
 
				+
			
 
				+Release 0.1.1 - 2006-04-08
			
 
				+
			
 
				+ 1. Added CHANGES.txt, logging all significant changes to Hadoop.  (cutting)
			
 
				+
			
 
				+ 2. Fix MapReduceBase.close() to throw IOException, as declared in the
			
 
				+    Closeable interface.  This permits subclasses which override this
			
 
				+    method to throw that exception. (cutting)
			
 
				+
			
 
				+ 3. Fix HADOOP-117.  Pathnames were mistakenly transposed in
			
 
				+    JobConf.getLocalFile() causing many mapred temporary files to not
			
 
				+    be removed.  (Raghavendra Prabhu via cutting)
			
 
				+ 
			
 
				+ 4. Fix HADOOP-116. Clean up job submission files when jobs complete.
			
 
				+    (cutting)
			
 
				+
			
 
				+ 5. Fix HADOOP-125. Fix handling of absolute paths on Windows (cutting)
			
 
				+
			
 
				+Release 0.1.0 - 2006-04-01
			
 
				+
			
 
				+ 1. The first release of Hadoop.
			
 
				+
			
--- a/common/LICENSE.txt
+++ b/common/LICENSE.txt
@@ -0,0 +1,244 @@
 
				+
			
 
				+                                 Apache License
			
 
				+                           Version 2.0, January 2004
			
 
				+                        http://www.apache.org/licenses/
			
 
				+
			
 
				+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
			
 
				+
			
 
				+   1. Definitions.
			
 
				+
			
 
				+      "License" shall mean the terms and conditions for use, reproduction,
			
 
				+      and distribution as defined by Sections 1 through 9 of this document.
			
 
				+
			
 
				+      "Licensor" shall mean the copyright owner or entity authorized by
			
 
				+      the copyright owner that is granting the License.
			
 
				+
			
 
				+      "Legal Entity" shall mean the union of the acting entity and all
			
 
				+      other entities that control, are controlled by, or are under common
			
 
				+      control with that entity. For the purposes of this definition,
			
 
				+      "control" means (i) the power, direct or indirect, to cause the
			
 
				+      direction or management of such entity, whether by contract or
			
 
				+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
			
 
				+      outstanding shares, or (iii) beneficial ownership of such entity.
			
 
				+
			
 
				+      "You" (or "Your") shall mean an individual or Legal Entity
			
 
				+      exercising permissions granted by this License.
			
 
				+
			
 
				+      "Source" form shall mean the preferred form for making modifications,
			
 
				+      including but not limited to software source code, documentation
			
 
				+      source, and configuration files.
			
 
				+
			
 
				+      "Object" form shall mean any form resulting from mechanical
			
 
				+      transformation or translation of a Source form, including but
			
 
				+      not limited to compiled object code, generated documentation,
			
 
				+      and conversions to other media types.
			
 
				+
			
 
				+      "Work" shall mean the work of authorship, whether in Source or
			
 
				+      Object form, made available under the License, as indicated by a
			
 
				+      copyright notice that is included in or attached to the work
			
 
				+      (an example is provided in the Appendix below).
			
 
				+
			
 
				+      "Derivative Works" shall mean any work, whether in Source or Object
			
 
				+      form, that is based on (or derived from) the Work and for which the
			
 
				+      editorial revisions, annotations, elaborations, or other modifications
			
 
				+      represent, as a whole, an original work of authorship. For the purposes
			
 
				+      of this License, Derivative Works shall not include works that remain
			
 
				+      separable from, or merely link (or bind by name) to the interfaces of,
			
 
				+      the Work and Derivative Works thereof.
			
 
				+
			
 
				+      "Contribution" shall mean any work of authorship, including
			
 
				+      the original version of the Work and any modifications or additions
			
 
				+      to that Work or Derivative Works thereof, that is intentionally
			
 
				+      submitted to Licensor for inclusion in the Work by the copyright owner
			
 
				+      or by an individual or Legal Entity authorized to submit on behalf of
			
 
				+      the copyright owner. For the purposes of this definition, "submitted"
			
 
				+      means any form of electronic, verbal, or written communication sent
			
 
				+      to the Licensor or its representatives, including but not limited to
			
 
				+      communication on electronic mailing lists, source code control systems,
			
 
				+      and issue tracking systems that are managed by, or on behalf of, the
			
 
				+      Licensor for the purpose of discussing and improving the Work, but
			
 
				+      excluding communication that is conspicuously marked or otherwise
			
 
				+      designated in writing by the copyright owner as "Not a Contribution."
			
 
				+
			
 
				+      "Contributor" shall mean Licensor and any individual or Legal Entity
			
 
				+      on behalf of whom a Contribution has been received by Licensor and
			
 
				+      subsequently incorporated within the Work.
			
 
				+
			
 
				+   2. Grant of Copyright License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      copyright license to reproduce, prepare Derivative Works of,
			
 
				+      publicly display, publicly perform, sublicense, and distribute the
			
 
				+      Work and such Derivative Works in Source or Object form.
			
 
				+
			
 
				+   3. Grant of Patent License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      (except as stated in this section) patent license to make, have made,
			
 
				+      use, offer to sell, sell, import, and otherwise transfer the Work,
			
 
				+      where such license applies only to those patent claims licensable
			
 
				+      by such Contributor that are necessarily infringed by their
			
 
				+      Contribution(s) alone or by combination of their Contribution(s)
			
 
				+      with the Work to which such Contribution(s) was submitted. If You
			
 
				+      institute patent litigation against any entity (including a
			
 
				+      cross-claim or counterclaim in a lawsuit) alleging that the Work
			
 
				+      or a Contribution incorporated within the Work constitutes direct
			
 
				+      or contributory patent infringement, then any patent licenses
			
 
				+      granted to You under this License for that Work shall terminate
			
 
				+      as of the date such litigation is filed.
			
 
				+
			
 
				+   4. Redistribution. You may reproduce and distribute copies of the
			
 
				+      Work or Derivative Works thereof in any medium, with or without
			
 
				+      modifications, and in Source or Object form, provided that You
			
 
				+      meet the following conditions:
			
 
				+
			
 
				+      (a) You must give any other recipients of the Work or
			
 
				+          Derivative Works a copy of this License; and
			
 
				+
			
 
				+      (b) You must cause any modified files to carry prominent notices
			
 
				+          stating that You changed the files; and
			
 
				+
			
 
				+      (c) You must retain, in the Source form of any Derivative Works
			
 
				+          that You distribute, all copyright, patent, trademark, and
			
 
				+          attribution notices from the Source form of the Work,
			
 
				+          excluding those notices that do not pertain to any part of
			
 
				+          the Derivative Works; and
			
 
				+
			
 
				+      (d) If the Work includes a "NOTICE" text file as part of its
			
 
				+          distribution, then any Derivative Works that You distribute must
			
 
				+          include a readable copy of the attribution notices contained
			
 
				+          within such NOTICE file, excluding those notices that do not
			
 
				+          pertain to any part of the Derivative Works, in at least one
			
 
				+          of the following places: within a NOTICE text file distributed
			
 
				+          as part of the Derivative Works; within the Source form or
			
 
				+          documentation, if provided along with the Derivative Works; or,
			
 
				+          within a display generated by the Derivative Works, if and
			
 
				+          wherever such third-party notices normally appear. The contents
			
 
				+          of the NOTICE file are for informational purposes only and
			
 
				+          do not modify the License. You may add Your own attribution
			
 
				+          notices within Derivative Works that You distribute, alongside
			
 
				+          or as an addendum to the NOTICE text from the Work, provided
			
 
				+          that such additional attribution notices cannot be construed
			
 
				+          as modifying the License.
			
 
				+
			
 
				+      You may add Your own copyright statement to Your modifications and
			
 
				+      may provide additional or different license terms and conditions
			
 
				+      for use, reproduction, or distribution of Your modifications, or
			
 
				+      for any such Derivative Works as a whole, provided Your use,
			
 
				+      reproduction, and distribution of the Work otherwise complies with
			
 
				+      the conditions stated in this License.
			
 
				+
			
 
				+   5. Submission of Contributions. Unless You explicitly state otherwise,
			
 
				+      any Contribution intentionally submitted for inclusion in the Work
			
 
				+      by You to the Licensor shall be under the terms and conditions of
			
 
				+      this License, without any additional terms or conditions.
			
 
				+      Notwithstanding the above, nothing herein shall supersede or modify
			
 
				+      the terms of any separate license agreement you may have executed
			
 
				+      with Licensor regarding such Contributions.
			
 
				+
			
 
				+   6. Trademarks. This License does not grant permission to use the trade
			
 
				+      names, trademarks, service marks, or product names of the Licensor,
			
 
				+      except as required for reasonable and customary use in describing the
			
 
				+      origin of the Work and reproducing the content of the NOTICE file.
			
 
				+
			
 
				+   7. Disclaimer of Warranty. Unless required by applicable law or
			
 
				+      agreed to in writing, Licensor provides the Work (and each
			
 
				+      Contributor provides its Contributions) on an "AS IS" BASIS,
			
 
				+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
			
 
				+      implied, including, without limitation, any warranties or conditions
			
 
				+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
			
 
				+      PARTICULAR PURPOSE. You are solely responsible for determining the
			
 
				+      appropriateness of using or redistributing the Work and assume any
			
 
				+      risks associated with Your exercise of permissions under this License.
			
 
				+
			
 
				+   8. Limitation of Liability. In no event and under no legal theory,
			
 
				+      whether in tort (including negligence), contract, or otherwise,
			
 
				+      unless required by applicable law (such as deliberate and grossly
			
 
				+      negligent acts) or agreed to in writing, shall any Contributor be
			
 
				+      liable to You for damages, including any direct, indirect, special,
			
 
				+      incidental, or consequential damages of any character arising as a
			
 
				+      result of this License or out of the use or inability to use the
			
 
				+      Work (including but not limited to damages for loss of goodwill,
			
 
				+      work stoppage, computer failure or malfunction, or any and all
			
 
				+      other commercial damages or losses), even if such Contributor
			
 
				+      has been advised of the possibility of such damages.
			
 
				+
			
 
				+   9. Accepting Warranty or Additional Liability. While redistributing
			
 
				+      the Work or Derivative Works thereof, You may choose to offer,
			
 
				+      and charge a fee for, acceptance of support, warranty, indemnity,
			
 
				+      or other liability obligations and/or rights consistent with this
			
 
				+      License. However, in accepting such obligations, You may act only
			
 
				+      on Your own behalf and on Your sole responsibility, not on behalf
			
 
				+      of any other Contributor, and only if You agree to indemnify,
			
 
				+      defend, and hold each Contributor harmless for any liability
			
 
				+      incurred by, or claims asserted against, such Contributor by reason
			
 
				+      of your accepting any such warranty or additional liability.
			
 
				+
			
 
				+   END OF TERMS AND CONDITIONS
			
 
				+
			
 
				+   APPENDIX: How to apply the Apache License to your work.
			
 
				+
			
 
				+      To apply the Apache License to your work, attach the following
			
 
				+      boilerplate notice, with the fields enclosed by brackets "[]"
			
 
				+      replaced with your own identifying information. (Don't include
			
 
				+      the brackets!)  The text should be enclosed in the appropriate
			
 
				+      comment syntax for the file format. We also recommend that a
			
 
				+      file or class name and description of purpose be included on the
			
 
				+      same "printed page" as the copyright notice for easier
			
 
				+      identification within third-party archives.
			
 
				+
			
 
				+   Copyright [yyyy] [name of copyright owner]
			
 
				+
			
 
				+   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+   you may not use this file except in compliance with the License.
			
 
				+   You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+
			
 
				+
			
 
				+APACHE HADOOP SUBCOMPONENTS:
			
 
				+
			
 
				+The Apache Hadoop project contains subcomponents with separate copyright
			
 
				+notices and license terms. Your use of the source code for the these
			
 
				+subcomponents is subject to the terms and conditions of the following
			
 
				+licenses. 
			
 
				+
			
 
				+For the org.apache.hadoop.util.bloom.* classes:
			
 
				+
			
 
				+/**
			
 
				+ *
			
 
				+ * Copyright (c) 2005, European Commission project OneLab under contract
			
 
				+ * 034819 (http://www.one-lab.org)
			
 
				+ * All rights reserved.
			
 
				+ * Redistribution and use in source and binary forms, with or 
			
 
				+ * without modification, are permitted provided that the following 
			
 
				+ * conditions are met:
			
 
				+ *  - Redistributions of source code must retain the above copyright 
			
 
				+ *    notice, this list of conditions and the following disclaimer.
			
 
				+ *  - Redistributions in binary form must reproduce the above copyright 
			
 
				+ *    notice, this list of conditions and the following disclaimer in 
			
 
				+ *    the documentation and/or other materials provided with the distribution.
			
 
				+ *  - Neither the name of the University Catholique de Louvain - UCL
			
 
				+ *    nor the names of its contributors may be used to endorse or 
			
 
				+ *    promote products derived from this software without specific prior 
			
 
				+ *    written permission.
			
 
				+ *    
			
 
				+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
			
 
				+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
			
 
				+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
			
 
				+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
			
 
				+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
			
 
				+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
			
 
				+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
			
 
				+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
			
 
				+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
			
 
				+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
			
 
				+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
			
 
				+ * POSSIBILITY OF SUCH DAMAGE.
			
 
				+ */
			
--- a/common/NOTICE.txt
+++ b/common/NOTICE.txt
@@ -0,0 +1,2 @@
 
				+This product includes software developed by The Apache Software
			
 
				+Foundation (http://www.apache.org/).
			
--- a/common/README.txt
+++ b/common/README.txt
@@ -0,0 +1,31 @@
 
				+For the latest information about Hadoop, please visit our website at:
			
 
				+
			
 
				+   http://hadoop.apache.org/core/
			
 
				+
			
 
				+and our wiki, at:
			
 
				+
			
 
				+   http://wiki.apache.org/hadoop/
			
 
				+
			
 
				+This distribution includes cryptographic software.  The country in 
			
 
				+which you currently reside may have restrictions on the import, 
			
 
				+possession, use, and/or re-export to another country, of 
			
 
				+encryption software.  BEFORE using any encryption software, please 
			
 
				+check your country's laws, regulations and policies concerning the
			
 
				+import, possession, or use, and re-export of encryption software, to 
			
 
				+see if this is permitted.  See <http://www.wassenaar.org/> for more
			
 
				+information.
			
 
				+
			
 
				+The U.S. Government Department of Commerce, Bureau of Industry and
			
 
				+Security (BIS), has classified this software as Export Commodity 
			
 
				+Control Number (ECCN) 5D002.C.1, which includes information security
			
 
				+software using or performing cryptographic functions with asymmetric
			
 
				+algorithms.  The form and manner of this Apache Software Foundation
			
 
				+distribution makes it eligible for export under the License Exception
			
 
				+ENC Technology Software Unrestricted (TSU) exception (see the BIS 
			
 
				+Export Administration Regulations, Section 740.13) for both object 
			
 
				+code and source code.
			
 
				+
			
 
				+The following provides more details on the included cryptographic
			
 
				+software:
			
 
				+  Hadoop Core uses the SSL libraries from the Jetty project written 
			
 
				+by mortbay.org.
			
--- a/common/bin/hadoop
+++ b/common/bin/hadoop
@@ -0,0 +1,123 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# This script runs the hadoop core commands. 
			
 
				+
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+ 
			
 
				+. "$bin"/hadoop-config.sh
			
 
				+
			
 
				+function print_usage(){
			
 
				+  echo "Usage: hadoop [--config confdir] COMMAND"
			
 
				+  echo "       where COMMAND is one of:"
			
 
				+  echo "  fs                   run a generic filesystem user client"
			
 
				+  echo "  version              print the version"
			
 
				+  echo "  jar <jar>            run a jar file"
			
 
				+  echo "  distcp <srcurl> <desturl> copy file or directories recursively"
			
 
				+  echo "  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
			
 
				+  echo "  classpath            prints the class path needed to get the"
			
 
				+  echo "                       Hadoop jar and the required libraries"
			
 
				+  echo "  daemonlog            get/set the log level for each daemon"
			
 
				+  echo " or"
			
 
				+  echo "  CLASSNAME            run the class named CLASSNAME"
			
 
				+  echo ""
			
 
				+  echo "Most commands print help when invoked w/o parameters."
			
 
				+}
			
 
				+
			
 
				+if [ $# = 0 ]; then
			
 
				+  print_usage
			
 
				+  exit
			
 
				+fi
			
 
				+
			
 
				+COMMAND=$1
			
 
				+case $COMMAND in
			
 
				+  #hdfs commands
			
 
				+  namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer)
			
 
				+    echo "DEPRECATED: Use of this script to execute hdfs command is deprecated."
			
 
				+    echo "Instead use the hdfs command for it."
			
 
				+    echo ""
			
 
				+    #try to locate hdfs and if present, delegate to it.  
			
 
				+    if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
			
 
				+      exec "${HADOOP_HDFS_HOME}"/bin/hdfs $*
			
 
				+    elif [ -f "${HADOOP_HOME}"/bin/hdfs ]; then
			
 
				+      exec "${HADOOP_HOME}"/bin/hdfs $*
			
 
				+    else
			
 
				+      echo "HDFS not found."
			
 
				+      exit
			
 
				+    fi
			
 
				+    ;;
			
 
				+
			
 
				+  #mapred commands  
			
 
				+  mradmin|jobtracker|tasktracker|pipes|job|queue)
			
 
				+    echo "DEPRECATED: Use of this script to execute mapred command is deprecated."
			
 
				+    echo "Instead use the mapred command for it."
			
 
				+    echo ""
			
 
				+    #try to locate mapred and if present, delegate to it.
			
 
				+    if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
			
 
				+      exec "${HADOOP_MAPRED_HOME}"/bin/mapred $*
			
 
				+    elif [ -f "${HADOOP_HOME}"/bin/mapred ]; then
			
 
				+      exec "${HADOOP_HOME}"/bin/mapred $* 
			
 
				+    else
			
 
				+      echo "MAPRED not found."
			
 
				+      exit
			
 
				+    fi
			
 
				+    ;;
			
 
				+
			
 
				+  classpath)
			
 
				+    if $cygwin; then
			
 
				+      CLASSPATH=`cygpath -p -w "$CLASSPATH"`
			
 
				+    fi
			
 
				+    echo $CLASSPATH
			
 
				+    exit
			
 
				+    ;;
			
 
				+
			
 
				+  #core commands  
			
 
				+  *)
			
 
				+    # the core commands
			
 
				+    if [ "$COMMAND" = "fs" ] ; then
			
 
				+      CLASS=org.apache.hadoop.fs.FsShell
			
 
				+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				+    elif [ "$COMMAND" = "version" ] ; then
			
 
				+      CLASS=org.apache.hadoop.util.VersionInfo
			
 
				+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				+    elif [ "$COMMAND" = "jar" ] ; then
			
 
				+      CLASS=org.apache.hadoop.util.RunJar
			
 
				+    elif [ "$COMMAND" = "distcp" ] ; then
			
 
				+      CLASS=org.apache.hadoop.tools.DistCp
			
 
				+      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
			
 
				+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				+    elif [ "$COMMAND" = "daemonlog" ] ; then
			
 
				+      CLASS=org.apache.hadoop.log.LogLevel
			
 
				+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				+    elif [ "$COMMAND" = "archive" ] ; then
			
 
				+      CLASS=org.apache.hadoop.tools.HadoopArchives
			
 
				+      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
			
 
				+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				+    else
			
 
				+      CLASS=$COMMAND
			
 
				+    fi
			
 
				+    shift
			
 
				+    
			
 
				+    if $cygwin; then
			
 
				+      CLASSPATH=`cygpath -p -w "$CLASSPATH"`
			
 
				+    fi
			
 
				+    export CLASSPATH=$CLASSPATH
			
 
				+    exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
			
 
				+    ;;
			
 
				+
			
 
				+esac
			
--- a/common/bin/hadoop-config.sh
+++ b/common/bin/hadoop-config.sh
@@ -0,0 +1,331 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# included in all the hadoop scripts with source command
			
 
				+# should not be executable directly
			
 
				+# also should not be passed any arguments, since we need original $*
			
 
				+
			
 
				+# resolve links - $0 may be a softlink
			
 
				+
			
 
				+this="${BASH_SOURCE-$0}"
			
 
				+while [ -h "$this" ]; do
			
 
				+  ls=`ls -ld "$this"`
			
 
				+  link=`expr "$ls" : '.*-> \(.*\)$'`
			
 
				+  if expr "$link" : '.*/.*' > /dev/null; then
			
 
				+    this="$link"
			
 
				+  else
			
 
				+    this=`dirname "$this"`/"$link"
			
 
				+  fi
			
 
				+done
			
 
				+
			
 
				+# convert relative path to absolute path
			
 
				+common_bin=`dirname "$this"`
			
 
				+script=`basename "$this"`
			
 
				+common_bin=`cd "$common_bin"; pwd`
			
 
				+this="$common_bin/$script"
			
 
				+
			
 
				+# the root of the Hadoop installation
			
 
				+#TODO: change the env variable when dir structure is changed
			
 
				+export HADOOP_HOME=`dirname "$this"`/..
			
 
				+export HADOOP_COMMON_HOME="${HADOOP_HOME}"
			
 
				+#export HADOOP_HOME=`dirname "$this"`/../..
			
 
				+#export HADOOP_COMMON_HOME="${HADOOP_COMMON_HOME:-`dirname "$this"`/..}"
			
 
				+
			
 
				+#check to see if the conf dir is given as an optional argument
			
 
				+if [ $# -gt 1 ]
			
 
				+then
			
 
				+    if [ "--config" = "$1" ]
			
 
				+	  then
			
 
				+	      shift
			
 
				+	      confdir=$1
			
 
				+	      shift
			
 
				+	      HADOOP_CONF_DIR=$confdir
			
 
				+    fi
			
 
				+fi
			
 
				+ 
			
 
				+# Allow alternate conf dir location.
			
 
				+export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_HOME/conf}"
			
 
				+
			
 
				+# User can specify hostnames or a file where the hostnames are (not both)
			
 
				+if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
			
 
				+  echo \
			
 
				+    "Error: Please specify one variable HADOOP_SLAVES or " \
			
 
				+    "HADOOP_SLAVE_NAME and not both."
			
 
				+  exit 1
			
 
				+fi
			
 
				+
			
 
				+# Process command line options that specify hosts or file with host
			
 
				+# list
			
 
				+if [ $# -gt 1 ]
			
 
				+then
			
 
				+    if [ "--hosts" = "$1" ]
			
 
				+    then
			
 
				+        shift
			
 
				+        export HADOOP_SLAVES="${HADOOP_CONF_DIR}/$$1"
			
 
				+        shift
			
 
				+    elif [ "--hostnames" = "$1" ]
			
 
				+    then
			
 
				+        shift
			
 
				+        export HADOOP_SLAVE_NAMES=$1
			
 
				+        shift
			
 
				+    fi
			
 
				+fi
			
 
				+
			
 
				+# User can specify hostnames or a file where the hostnames are (not both)
			
 
				+# (same check as above but now we know it's command line options that cause
			
 
				+# the problem)
			
 
				+if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
			
 
				+  echo \
			
 
				+    "Error: Please specify one of --hosts or --hostnames options and not both."
			
 
				+  exit 1
			
 
				+fi
			
 
				+
			
 
				+cygwin=false
			
 
				+case "`uname`" in
			
 
				+CYGWIN*) cygwin=true;;
			
 
				+esac
			
 
				+
			
 
				+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
			
 
				+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
			
 
				+fi
			
 
				+
			
 
				+# some Java parameters
			
 
				+if [ "$JAVA_HOME" != "" ]; then
			
 
				+  #echo "run java in $JAVA_HOME"
			
 
				+  JAVA_HOME=$JAVA_HOME
			
 
				+fi
			
 
				+  
			
 
				+if [ "$JAVA_HOME" = "" ]; then
			
 
				+  echo "Error: JAVA_HOME is not set."
			
 
				+  exit 1
			
 
				+fi
			
 
				+
			
 
				+JAVA=$JAVA_HOME/bin/java
			
 
				+JAVA_HEAP_MAX=-Xmx1000m 
			
 
				+
			
 
				+# check envvars which might override default args
			
 
				+if [ "$HADOOP_HEAPSIZE" != "" ]; then
			
 
				+  #echo "run with heapsize $HADOOP_HEAPSIZE"
			
 
				+  JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
			
 
				+  #echo $JAVA_HEAP_MAX
			
 
				+fi
			
 
				+
			
 
				+# CLASSPATH initially contains $HADOOP_CONF_DIR
			
 
				+CLASSPATH="${HADOOP_CONF_DIR}"
			
 
				+CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
			
 
				+
			
 
				+# for developers, add Hadoop classes to CLASSPATH
			
 
				+if [ -d "$HADOOP_COMMON_HOME/build/classes" ]; then
			
 
				+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build/classes
			
 
				+fi
			
 
				+if [ -d "$HADOOP_COMMON_HOME/build/webapps" ]; then
			
 
				+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build
			
 
				+fi
			
 
				+if [ -d "$HADOOP_COMMON_HOME/build/test/classes" ]; then
			
 
				+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build/test/classes
			
 
				+fi
			
 
				+if [ -d "$HADOOP_COMMON_HOME/build/test/core/classes" ]; then
			
 
				+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/build/test/core/classes
			
 
				+fi
			
 
				+
			
 
				+# so that filenames w/ spaces are handled correctly in loops below
			
 
				+IFS=
			
 
				+
			
 
				+# for releases, add core hadoop jar & webapps to CLASSPATH
			
 
				+if [ -d "$HADOOP_COMMON_HOME/webapps" ]; then
			
 
				+  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME
			
 
				+fi
			
 
				+for f in $HADOOP_COMMON_HOME/hadoop-*.jar; do
			
 
				+  CLASSPATH=${CLASSPATH}:$f;
			
 
				+done
			
 
				+
			
 
				+# add libs to CLASSPATH
			
 
				+for f in $HADOOP_COMMON_HOME/lib/*.jar; do
			
 
				+  CLASSPATH=${CLASSPATH}:$f;
			
 
				+done
			
 
				+
			
 
				+if [ -d "$HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Common/common" ]; then
			
 
				+for f in $HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Common/common/*.jar; do
			
 
				+  CLASSPATH=${CLASSPATH}:$f;
			
 
				+done
			
 
				+fi
			
 
				+
			
 
				+if [ -d "$HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Hdfs/common" ]; then
			
 
				+for f in $HADOOP_COMMON_HOME/build/ivy/lib/Hadoop-Hdfs/common/*.jar; do
			
 
				+  CLASSPATH=${CLASSPATH}:$f;
			
 
				+done
			
 
				+fi
			
 
				+
			
 
				+if [ -d "$HADOOP_COMMON_HOME/build/ivy/lib/Hadoop/common" ]; then
			
 
				+for f in $HADOOP_COMMON_HOME/build/ivy/lib/Hadoop/common/*.jar; do
			
 
				+  CLASSPATH=${CLASSPATH}:$f;
			
 
				+done
			
 
				+fi
			
 
				+
			
 
				+for f in $HADOOP_COMMON_HOME/lib/jsp-2.1/*.jar; do
			
 
				+  CLASSPATH=${CLASSPATH}:$f;
			
 
				+done
			
 
				+
			
 
				+# add user-specified CLASSPATH last
			
 
				+if [ "$HADOOP_CLASSPATH" != "" ]; then
			
 
				+  CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
			
 
				+fi
			
 
				+
			
 
				+# default log directory & file
			
 
				+if [ "$HADOOP_LOG_DIR" = "" ]; then
			
 
				+  HADOOP_LOG_DIR="$HADOOP_HOME/logs"
			
 
				+fi
			
 
				+if [ "$HADOOP_LOGFILE" = "" ]; then
			
 
				+  HADOOP_LOGFILE='hadoop.log'
			
 
				+fi
			
 
				+
			
 
				+# default policy file for service-level authorization
			
 
				+if [ "$HADOOP_POLICYFILE" = "" ]; then
			
 
				+  HADOOP_POLICYFILE="hadoop-policy.xml"
			
 
				+fi
			
 
				+
			
 
				+# restore ordinary behaviour
			
 
				+unset IFS
			
 
				+
			
 
				+# cygwin path translation
			
 
				+if $cygwin; then
			
 
				+  HADOOP_COMMON_HOME=`cygpath -w "$HADOOP_COMMON_HOME"`
			
 
				+  HADOOP_LOG_DIR=`cygpath -w "$HADOOP_LOG_DIR"`
			
 
				+  JAVA_LIBRARY_PATH=`cygpath -w "$JAVA_LIBRARY_PATH"`
			
 
				+fi
			
 
				+
			
 
				+# setup 'java.library.path' for native-hadoop code if necessary
			
 
				+
			
 
				+if [ -d "${HADOOP_COMMON_HOME}/build/native" -o -d "${HADOOP_COMMON_HOME}/lib/native" ]; then
			
 
				+  JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} -Xmx32m ${HADOOP_JAVA_PLATFORM_OPTS} org.apache.hadoop.util.PlatformName | sed -e "s/ /_/g"`
			
 
				+  
			
 
				+  if [ -d "$HADOOP_COMMON_HOME/build/native" ]; then
			
 
				+    if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
			
 
				+        JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_CORE_HOME}/build/native/${JAVA_PLATFORM}/lib
			
 
				+    else
			
 
				+        JAVA_LIBRARY_PATH=${HADOOP_CORE_HOME}/build/native/${JAVA_PLATFORM}/lib
			
 
				+    fi
			
 
				+  fi
			
 
				+  
			
 
				+  if [ -d "${HADOOP_COMMON_HOME}/lib/native" ]; then
			
 
				+    if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
			
 
				+      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_COMMON_HOME}/lib/native/${JAVA_PLATFORM}
			
 
				+    else
			
 
				+      JAVA_LIBRARY_PATH=${HADOOP_COMMON_HOME}/lib/native/${JAVA_PLATFORM}
			
 
				+    fi
			
 
				+  fi
			
 
				+fi
			
 
				+
			
 
				+# cygwin path translation
			
 
				+if $cygwin; then
			
 
				+  JAVA_LIBRARY_PATH=`cygpath -p "$JAVA_LIBRARY_PATH"`
			
 
				+fi
			
 
				+
			
 
				+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
			
 
				+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
			
 
				+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_COMMON_HOME"
			
 
				+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
			
 
				+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
			
 
				+if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
			
 
				+  HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
			
 
				+fi  
			
 
				+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"
			
 
				+
			
 
				+# put hdfs in classpath if present
			
 
				+if [ "$HADOOP_HDFS_HOME" = "" ]; then
			
 
				+  if [ -d "${HADOOP_HOME}/hdfs" ]; then
			
 
				+    HADOOP_HDFS_HOME=$HADOOP_HOME/hdfs
			
 
				+    #echo Found HDFS installed at $HADOOP_HDFS_HOME
			
 
				+  fi
			
 
				+fi
			
 
				+
			
 
				+if [ -d "${HADOOP_HDFS_HOME}" ]; then
			
 
				+
			
 
				+  if [ -d "$HADOOP_HDFS_HOME/webapps" ]; then
			
 
				+    CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME
			
 
				+  fi
			
 
				+  
			
 
				+  if [ -d "${HADOOP_HDFS_HOME}/conf" ]; then
			
 
				+    CLASSPATH=${CLASSPATH}:${HADOOP_HDFS_HOME}/conf
			
 
				+  fi
			
 
				+  
			
 
				+  for f in $HADOOP_HDFS_HOME/hadoop-hdfs-*.jar; do
			
 
				+    CLASSPATH=${CLASSPATH}:$f;
			
 
				+  done
			
 
				+
			
 
				+  # add libs to CLASSPATH
			
 
				+  for f in $HADOOP_HDFS_HOME/lib/*.jar; do
			
 
				+    CLASSPATH=${CLASSPATH}:$f;
			
 
				+  done
			
 
				+  
			
 
				+  if [ -d "$HADOOP_HDFS_HOME/build/classes" ]; then
			
 
				+    CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/build/classes
			
 
				+  fi
			
 
				+fi
			
 
				+
			
 
				+# cygwin path translation
			
 
				+if $cygwin; then
			
 
				+  HADOOP_HDFS_HOME=`cygpath -w "$HADOOP_HDFS_HOME"`
			
 
				+fi
			
 
				+
			
 
				+# set mapred home if mapred is present
			
 
				+if [ "$HADOOP_MAPRED_HOME" = "" ]; then
			
 
				+  if [ -d "${HADOOP_HOME}/mapred" ]; then
			
 
				+    HADOOP_MAPRED_HOME=$HADOOP_HOME/mapred
			
 
				+    #echo Found MAPRED installed at $HADOOP_MAPRED_HOME
			
 
				+  fi
			
 
				+fi
			
 
				+
			
 
				+if [ -d "${HADOOP_MAPRED_HOME}" ]; then
			
 
				+
			
 
				+  if [ -d "$HADOOP_MAPRED_HOME/webapps" ]; then
			
 
				+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME
			
 
				+  fi
			
 
				+
			
 
				+  if [ -d "${HADOOP_MAPRED_HOME}/conf" ]; then
			
 
				+    CLASSPATH=${CLASSPATH}:${HADOOP_MAPRED_HOME}/conf
			
 
				+  fi
			
 
				+  
			
 
				+  for f in $HADOOP_MAPRED_HOME/hadoop-mapred-*.jar; do
			
 
				+    CLASSPATH=${CLASSPATH}:$f
			
 
				+  done
			
 
				+
			
 
				+  for f in $HADOOP_MAPRED_HOME/lib/*.jar; do
			
 
				+    CLASSPATH=${CLASSPATH}:$f
			
 
				+  done
			
 
				+
			
 
				+  if [ -d "$HADOOP_MAPRED_HOME/build/classes" ]; then
			
 
				+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/classes
			
 
				+  fi
			
 
				+
			
 
				+  if [ -d "$HADOOP_MAPRED_HOME/build/tools" ]; then
			
 
				+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/tools
			
 
				+  fi
			
 
				+
			
 
				+  for f in $HADOOP_MAPRED_HOME/hadoop-mapred-tools-*.jar; do
			
 
				+    TOOL_PATH=${TOOL_PATH}:$f;
			
 
				+  done
			
 
				+  for f in $HADOOP_MAPRED_HOME/build/hadoop-mapred-tools-*.jar; do
			
 
				+    TOOL_PATH=${TOOL_PATH}:$f;
			
 
				+  done
			
 
				+fi
			
 
				+
			
 
				+# cygwin path translation
			
 
				+if $cygwin; then
			
 
				+  HADOOP_MAPRED_HOME=`cygpath -w "$HADOOP_MAPRED_HOME"`
			
 
				+  TOOL_PATH=`cygpath -p -w "$TOOL_PATH"`
			
 
				+fi
			
 
				+
			
 
				+
			
--- a/common/bin/hadoop-daemon.sh
+++ b/common/bin/hadoop-daemon.sh
@@ -0,0 +1,167 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+# Runs a Hadoop command as a daemon.
			
 
				+#
			
 
				+# Environment Variables
			
 
				+#
			
 
				+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
			
 
				+#   HADOOP_LOG_DIR   Where log files are stored.  PWD by default.
			
 
				+#   HADOOP_MASTER    host:path where hadoop code should be rsync'd from
			
 
				+#   HADOOP_PID_DIR   The pid files are stored. /tmp by default.
			
 
				+#   HADOOP_IDENT_STRING   A string representing this instance of hadoop. $USER by default
			
 
				+#   HADOOP_NICENESS The scheduling priority for daemons. Defaults to 0.
			
 
				+##
			
 
				+
			
 
				+usage="Usage: hadoop-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] [--script script] (start|stop) <hadoop-command> <args...>"
			
 
				+
			
 
				+# if no args specified, show usage
			
 
				+if [ $# -le 1 ]; then
			
 
				+  echo $usage
			
 
				+  exit 1
			
 
				+fi
			
 
				+
			
 
				+bin=`dirname "${BASH_SOURCE-$0}"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+
			
 
				+. "$bin"/hadoop-config.sh
			
 
				+
			
 
				+# get arguments
			
 
				+
			
 
				+#default value
			
 
				+hadoopScript="$HADOOP_HOME"/bin/hadoop
			
 
				+if [ "--script" = "$1" ]
			
 
				+  then
			
 
				+    shift
			
 
				+    hadoopScript=$1
			
 
				+    shift
			
 
				+fi
			
 
				+startStop=$1
			
 
				+shift
			
 
				+command=$1
			
 
				+shift
			
 
				+
			
 
				+hadoop_rotate_log ()
			
 
				+{
			
 
				+    log=$1;
			
 
				+    num=5;
			
 
				+    if [ -n "$2" ]; then
			
 
				+	num=$2
			
 
				+    fi
			
 
				+    if [ -f "$log" ]; then # rotate logs
			
 
				+	while [ $num -gt 1 ]; do
			
 
				+	    prev=`expr $num - 1`
			
 
				+	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
			
 
				+	    num=$prev
			
 
				+	done
			
 
				+	mv "$log" "$log.$num";
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
			
 
				+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
			
 
				+fi
			
 
				+
			
 
				+# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
			
 
				+if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
			
 
				+  export HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
			
 
				+  export HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
			
 
				+  export HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER   
			
 
				+fi
			
 
				+
			
 
				+if [ "$HADOOP_IDENT_STRING" = "" ]; then
			
 
				+  export HADOOP_IDENT_STRING="$USER"
			
 
				+fi
			
 
				+
			
 
				+
			
 
				+# get log directory
			
 
				+if [ "$HADOOP_LOG_DIR" = "" ]; then
			
 
				+  export HADOOP_LOG_DIR="$HADOOP_HOME/logs"
			
 
				+fi
			
 
				+mkdir -p "$HADOOP_LOG_DIR"
			
 
				+chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR
			
 
				+
			
 
				+if [ "$HADOOP_PID_DIR" = "" ]; then
			
 
				+  HADOOP_PID_DIR=/tmp
			
 
				+fi
			
 
				+
			
 
				+# some variables
			
 
				+export HADOOP_LOGFILE=hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.log
			
 
				+export HADOOP_ROOT_LOGGER="INFO,DRFA"
			
 
				+log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.out
			
 
				+pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid
			
 
				+
			
 
				+# Set default scheduling priority
			
 
				+if [ "$HADOOP_NICENESS" = "" ]; then
			
 
				+    export HADOOP_NICENESS=0
			
 
				+fi
			
 
				+
			
 
				+case $startStop in
			
 
				+
			
 
				+  (start)
			
 
				+
			
 
				+    mkdir -p "$HADOOP_PID_DIR"
			
 
				+
			
 
				+    if [ -f $pid ]; then
			
 
				+      if kill -0 `cat $pid` > /dev/null 2>&1; then
			
 
				+        echo $command running as process `cat $pid`.  Stop it first.
			
 
				+        exit 1
			
 
				+      fi
			
 
				+    fi
			
 
				+
			
 
				+    if [ "$HADOOP_MASTER" != "" ]; then
			
 
				+      echo rsync from $HADOOP_MASTER
			
 
				+      rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_HOME"
			
 
				+    fi
			
 
				+
			
 
				+    hadoop_rotate_log $log
			
 
				+    echo starting $command, logging to $log
			
 
				+    cd "$HADOOP_HOME"
			
 
				+    nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
			
 
				+    echo $! > $pid
			
 
				+    sleep 1; head "$log"
			
 
				+    sleep 3;
			
 
				+    if ! ps -p $! > /dev/null ; then
			
 
				+      exit 1
			
 
				+    fi
			
 
				+    ;;
			
 
				+          
			
 
				+  (stop)
			
 
				+
			
 
				+    if [ -f $pid ]; then
			
 
				+      if kill -0 `cat $pid` > /dev/null 2>&1; then
			
 
				+        echo stopping $command
			
 
				+        kill `cat $pid`
			
 
				+      else
			
 
				+        echo no $command to stop
			
 
				+        exit 1
			
 
				+      fi
			
 
				+    else
			
 
				+      echo no $command to stop
			
 
				+      exit 1
			
 
				+    fi
			
 
				+    ;;
			
 
				+
			
 
				+  (*)
			
 
				+    echo $usage
			
 
				+    exit 1
			
 
				+    ;;
			
 
				+
			
 
				+esac
			
 
				+
			
 
				+
			
--- a/common/bin/hadoop-daemons.sh
+++ b/common/bin/hadoop-daemons.sh
@@ -0,0 +1,34 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+# Run a Hadoop command on all slave hosts.
			
 
				+
			
 
				+usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
			
 
				+
			
 
				+# if no args specified, show usage
			
 
				+if [ $# -le 1 ]; then
			
 
				+  echo $usage
			
 
				+  exit 1
			
 
				+fi
			
 
				+
			
 
				+bin=`dirname "${BASH_SOURCE-$0}"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+
			
 
				+. $bin/hadoop-config.sh
			
 
				+
			
 
				+exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_HOME" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"
			
--- a/common/bin/rcc
+++ b/common/bin/rcc
@@ -0,0 +1,99 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+# The Hadoop record compiler
			
 
				+#
			
 
				+# Environment Variables
			
 
				+#
			
 
				+#   JAVA_HOME        The java implementation to use.  Overrides JAVA_HOME.
			
 
				+#
			
 
				+#   HADOOP_OPTS      Extra Java runtime options.
			
 
				+#
			
 
				+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
			
 
				+#
			
 
				+
			
 
				+bin=`dirname "${BASH_SOURCE-$0}"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+
			
 
				+. "$bin"/hadoop-config.sh
			
 
				+
			
 
				+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
			
 
				+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
			
 
				+fi
			
 
				+
			
 
				+# some Java parameters
			
 
				+if [ "$JAVA_HOME" != "" ]; then
			
 
				+  #echo "run java in $JAVA_HOME"
			
 
				+  JAVA_HOME=$JAVA_HOME
			
 
				+fi
			
 
				+  
			
 
				+if [ "$JAVA_HOME" = "" ]; then
			
 
				+  echo "Error: JAVA_HOME is not set."
			
 
				+  exit 1
			
 
				+fi
			
 
				+
			
 
				+JAVA=$JAVA_HOME/bin/java
			
 
				+JAVA_HEAP_MAX=-Xmx1000m 
			
 
				+
			
 
				+# CLASSPATH initially contains $HADOOP_CONF_DIR
			
 
				+CLASSPATH="${HADOOP_CONF_DIR}"
			
 
				+CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
			
 
				+
			
 
				+# for developers, add Hadoop classes to CLASSPATH
			
 
				+if [ -d "$HADOOP_HOME/build/classes" ]; then
			
 
				+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/classes
			
 
				+fi
			
 
				+if [ -d "$HADOOP_HOME/build/webapps" ]; then
			
 
				+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build
			
 
				+fi
			
 
				+if [ -d "$HADOOP_HOME/build/test/classes" ]; then
			
 
				+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/test/classes
			
 
				+fi
			
 
				+
			
 
				+# so that filenames w/ spaces are handled correctly in loops below
			
 
				+IFS=
			
 
				+
			
 
				+# for releases, add core hadoop jar & webapps to CLASSPATH
			
 
				+if [ -d "$HADOOP_HOME/webapps" ]; then
			
 
				+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME
			
 
				+fi
			
 
				+for f in $HADOOP_HOME/hadoop-*.jar; do
			
 
				+  CLASSPATH=${CLASSPATH}:$f;
			
 
				+done
			
 
				+
			
 
				+# add libs to CLASSPATH
			
 
				+for f in $HADOOP_HOME/lib/*.jar; do
			
 
				+  CLASSPATH=${CLASSPATH}:$f;
			
 
				+done
			
 
				+
			
 
				+for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
			
 
				+  CLASSPATH=${CLASSPATH}:$f;
			
 
				+done
			
 
				+
			
 
				+# restore ordinary behaviour
			
 
				+unset IFS
			
 
				+
			
 
				+CLASS='org.apache.hadoop.record.compiler.generated.Rcc'
			
 
				+
			
 
				+# cygwin path translation
			
 
				+if expr `uname` : 'CYGWIN*' > /dev/null; then
			
 
				+  CLASSPATH=`cygpath -p -w "$CLASSPATH"`
			
 
				+fi
			
 
				+
			
 
				+# run it
			
 
				+exec "$JAVA" $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"
			
--- a/common/bin/slaves.sh
+++ b/common/bin/slaves.sh
@@ -0,0 +1,65 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+# Run a shell command on all slave hosts.
			
 
				+#
			
 
				+# Environment Variables
			
 
				+#
			
 
				+#   HADOOP_SLAVES    File naming remote hosts.
			
 
				+#     Default is ${HADOOP_CONF_DIR}/slaves.
			
 
				+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
			
 
				+#   HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
			
 
				+#   HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
			
 
				+##
			
 
				+
			
 
				+usage="Usage: slaves.sh [--config confdir] command..."
			
 
				+
			
 
				+# if no args specified, show usage
			
 
				+if [ $# -le 0 ]; then
			
 
				+  echo $usage
			
 
				+  exit 1
			
 
				+fi
			
 
				+
			
 
				+bin=`dirname "${BASH_SOURCE-$0}"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+
			
 
				+. "$bin"/hadoop-config.sh
			
 
				+
			
 
				+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
			
 
				+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
			
 
				+fi
			
 
				+
			
 
				+# Where to start the script, see hadoop-config.sh
			
 
				+# (it set up the variables based on command line options)
			
 
				+if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then
			
 
				+  SLAVE_NAMES=$HADOOP_SLAVE_NAMES
			
 
				+else
			
 
				+  SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
			
 
				+  SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed  's/#.*$//;/^$/d')
			
 
				+fi
			
 
				+
			
 
				+# start the daemons
			
 
				+for slave in $SLAVE_NAMES ; do
			
 
				+ ssh $HADOOP_SSH_OPTS $slave $"${@// /\\ }" \
			
 
				+   2>&1 | sed "s/^/$slave: /" &
			
 
				+ if [ "$HADOOP_SLAVE_SLEEP" != "" ]; then
			
 
				+   sleep $HADOOP_SLAVE_SLEEP
			
 
				+ fi
			
 
				+done
			
 
				+
			
 
				+wait
			
--- a/common/bin/start-all.sh
+++ b/common/bin/start-all.sh
@@ -0,0 +1,36 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+# Start all hadoop daemons.  Run this on master node.
			
 
				+
			
 
				+echo "This script is Deprecated. Instead use start-dfs.sh and start-mapred.sh"
			
 
				+
			
 
				+bin=`dirname "${BASH_SOURCE-$0}"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+
			
 
				+. "$bin"/hadoop-config.sh
			
 
				+
			
 
				+# start hdfs daemons if hdfs is present
			
 
				+if [ -f "${HADOOP_HDFS_HOME}"/bin/start-dfs.sh ]; then
			
 
				+  "${HADOOP_HDFS_HOME}"/bin/start-dfs.sh --config $HADOOP_CONF_DIR
			
 
				+fi
			
 
				+
			
 
				+# start mapred daemons if mapred is present
			
 
				+if [ -f "${HADOOP_MAPRED_HOME}"/bin/start-mapred.sh ]; then
			
 
				+  "${HADOOP_MAPRED_HOME}"/bin/start-mapred.sh --config $HADOOP_CONF_DIR
			
 
				+fi
			
--- a/common/bin/stop-all.sh
+++ b/common/bin/stop-all.sh
@@ -0,0 +1,37 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+# Stop all hadoop daemons.  Run this on master node.
			
 
				+
			
 
				+echo "This script is Deprecated. Instead use stop-dfs.sh and stop-mapred.sh"
			
 
				+
			
 
				+bin=`dirname "${BASH_SOURCE-$0}"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+
			
 
				+. "$bin"/hadoop-config.sh
			
 
				+
			
 
				+# stop hdfs daemons if hdfs is present
			
 
				+if [ -f "${HADOOP_HDFS_HOME}"/bin/stop-dfs.sh ]; then
			
 
				+  "${HADOOP_HDFS_HOME}"/bin/stop-dfs.sh --config $HADOOP_CONF_DIR
			
 
				+fi
			
 
				+
			
 
				+# stop mapred daemons if mapred is present
			
 
				+if [ -f "${HADOOP_MAPRED_HOME}"/bin/stop-mapred.sh ]; then
			
 
				+  "${HADOOP_MAPRED_HOME}"/bin/stop-mapred.sh --config $HADOOP_CONF_DIR
			
 
				+fi
			
 
				+
			
--- a/common/build.xml
+++ b/common/build.xml
@@ -0,0 +1,1769 @@
 
				+<?xml version="1.0"?>
			
 
				+
			
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<project name="Hadoop-Common" default="compile" 
			
 
				+   xmlns:ivy="antlib:org.apache.ivy.ant"
			
 
				+   xmlns:artifact="urn:maven-artifact-ant"> 
			
 
				+
			
 
				+  <!-- Load all the default properties, and any the user wants    -->
			
 
				+  <!-- to contribute (without having to type -D or edit this file -->
			
 
				+  <property file="${user.home}/build.properties" />
			
 
				+  <property file="${basedir}/build.properties" />
			
 
				+ 
			
 
				+  <property name="Name" value="Hadoop-common"/>
			
 
				+  <property name="name" value="hadoop-common"/>
			
 
				+  <property name="version" value="0.22.0-SNAPSHOT"/>
			
 
				+  <property name="final.name" value="${name}-${version}"/>
			
 
				+  <property name="test.final.name" value="${name}-test-${version}"/>
			
 
				+  <property name="year" value="2009"/>
			
 
				+
			
 
				+  <property name="src.dir" value="${basedir}/src"/>  	
			
 
				+  <property name="java.src.dir" value="${src.dir}/java"/>
			
 
				+  <property name="native.src.dir" value="${basedir}/src/native"/>
			
 
				+
			
 
				+  <property name="lib.dir" value="${basedir}/lib"/>
			
 
				+  <property name="conf.dir" value="${basedir}/conf"/>
			
 
				+  <property name="contrib.dir" value="${basedir}/src/contrib"/>
			
 
				+  <property name="docs.src" value="${basedir}/src/docs"/>
			
 
				+  <property name="src.docs.cn" value="${basedir}/src/docs/cn"/>
			
 
				+  <property name="changes.src" value="${docs.src}/changes"/>
			
 
				+
			
 
				+  <property name="build.dir" value="${basedir}/build"/>
			
 
				+  <property name="build.classes" value="${build.dir}/classes"/>
			
 
				+  <property name="build.src" value="${build.dir}/src"/>
			
 
				+  <property name="build.webapps" value="${build.dir}/webapps"/>
			
 
				+
			
 
				+  <!-- convert spaces to _ so that mac os doesn't break things -->
			
 
				+  <exec executable="tr" inputstring="${os.name}" 
			
 
				+        outputproperty="nonspace.os">
			
 
				+     <arg value="[:space:]"/>
			
 
				+     <arg value="_"/>
			
 
				+  </exec>
			
 
				+  <property name="build.platform" 
			
 
				+            value="${nonspace.os}-${os.arch}-${sun.arch.data.model}"/>
			
 
				+  <property name="jvm.arch" 
			
 
				+            value="${sun.arch.data.model}"/>
			
 
				+  <property name="build.native" value="${build.dir}/native/${build.platform}"/>
			
 
				+  <property name="build.docs" value="${build.dir}/docs"/>
			
 
				+  <property name="build.docs.cn" value="${build.dir}/docs/cn"/>
			
 
				+  <property name="build.javadoc" value="${build.docs}/api"/>
			
 
				+  <property name="build.javadoc.timestamp" value="${build.javadoc}/index.html" />
			
 
				+  <property name="build.javadoc.dev" value="${build.docs}/dev-api"/>
			
 
				+  <property name="build.encoding" value="ISO-8859-1"/>
			
 
				+  <property name="install.c++" value="${build.dir}/c++/${build.platform}"/>
			
 
				+
			
 
				+  <property name="test.src.dir" value="${basedir}/src/test"/>
			
 
				+  <property name="test.build.dir" value="${build.dir}/test"/>
			
 
				+  <property name="test.generated.dir" value="${test.build.dir}/src"/>
			
 
				+  <property name="test.build.data" value="${test.build.dir}/data"/>
			
 
				+  <property name="test.cache.data" value="${test.build.dir}/cache"/>
			
 
				+  <property name="test.debug.data" value="${test.build.dir}/debug"/>
			
 
				+  <property name="test.log.dir" value="${test.build.dir}/logs"/>
			
 
				+  <property name="test.build.classes" value="${test.build.dir}/classes"/>
			
 
				+  <property name="test.build.extraconf" value="${test.build.dir}/extraconf"/>
			
 
				+  <property name="test.build.javadoc" value="${test.build.dir}/docs/api"/>
			
 
				+  <property name="test.build.javadoc.dev" value="${test.build.dir}/docs/dev-api"/>
			
 
				+  <property name="test.build.webapps" value="${build.dir}/test/webapps"/>
			
 
				+  <property name="test.include" value="Test*"/>
			
 
				+  <property name="test.classpath.id" value="test.classpath"/>
			
 
				+  <property name="test.output" value="no"/>
			
 
				+  <property name="test.timeout" value="900000"/>
			
 
				+  <property name="test.junit.output.format" value="plain"/>
			
 
				+  <property name="test.junit.fork.mode" value="perTest" />
			
 
				+  <property name="test.junit.printsummary" value="yes" />
			
 
				+  <property name="test.junit.haltonfailure" value="no" />
			
 
				+  <property name="test.junit.maxmemory" value="512m" />
			
 
				+  <property name="test.conf.dir" value="${build.dir}/test/conf" />
			
 
				+
			
 
				+  <property name="test.core.build.classes" value="${test.build.dir}/core/classes"/>
			
 
				+
			
 
				+  <property name="test.all.tests.file" value="${test.src.dir}/all-tests"/>
			
 
				+
			
 
				+  <property name="javadoc.link.java"
			
 
				+	    value="http://java.sun.com/javase/6/docs/api/"/>
			
 
				+  <property name="javadoc.packages" value="org.apache.hadoop.*"/>
			
 
				+  <property name="javadoc.maxmemory" value="512m" />
			
 
				+
			
 
				+  <property name="dist.dir" value="${build.dir}/${final.name}"/>
			
 
				+
			
 
				+  <property name="javac.debug" value="on"/>
			
 
				+  <property name="javac.optimize" value="on"/>
			
 
				+  <property name="javac.deprecation" value="off"/>
			
 
				+  <property name="javac.version" value="1.6"/>
			
 
				+  <property name="javac.args" value=""/>
			
 
				+  <property name="javac.args.warnings" value="-Xlint:unchecked"/>
			
 
				+
			
 
				+  <property name="clover.db.dir" location="${build.dir}/test/clover/db"/>
			
 
				+  <property name="clover.report.dir" location="${build.dir}/test/clover/reports"/>
			
 
				+
			
 
				+  <property name="rat.reporting.classname" value="rat.Report"/>
			
 
				+
			
 
				+  <property name="jdiff.build.dir" value="${build.docs}/jdiff"/>
			
 
				+  <property name="jdiff.xml.dir" value="${lib.dir}/jdiff"/>
			
 
				+  <property name="jdiff.stability" value="-unstable"/>
			
 
				+  <property name="jdiff.compatibility" value=""/>
			
 
				+  <property name="jdiff.stable" value="0.20.2"/>
			
 
				+  <property name="jdiff.stable.javadoc" 
			
 
				+            value="http://hadoop.apache.org/core/docs/r${jdiff.stable}/api/"/>
			
 
				+
			
 
				+  <property name="scratch.dir" value="${user.home}/tmp"/>
			
 
				+  <property name="svn.cmd" value="svn"/>
			
 
				+  <property name="grep.cmd" value="grep"/>
			
 
				+  <property name="patch.cmd" value="patch"/>
			
 
				+  <property name="make.cmd" value="make"/>
			
 
				+
			
 
				+	
			
 
				+  <!-- IVY properteis set here -->
			
 
				+  <property name="ivy.repo.dir" value="${user.home}/ivyrepo" />
			
 
				+  <property name="ivy.dir" location="ivy" />
			
 
				+  <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
			
 
				+  <property name="asfrepo" value="https://repository.apache.org"/> 
			
 
				+  <property name="asfsnapshotrepo" value="${asfrepo}/content/repositories/snapshots"/>
			
 
				+  <property name="asfstagingrepo"
			
 
				+  value="${asfrepo}/service/local/staging/deploy/maven2"/>
			
 
				+  <property name="mvnrepo" value="http://repo2.maven.org/maven2"/>
			
 
				+  <property name="ivy.jar" location="${ivy.dir}/ivy-${ivy.version}.jar"/>
			
 
				+  <property name="ant_task.jar" location="${ivy.dir}/maven-ant-tasks-${ant-task.version}.jar"/>
			
 
				+  <property name="ant_task_repo_url" 
			
 
				+     value="${mvnrepo}/org/apache/maven/maven-ant-tasks/${ant-task.version}/maven-ant-tasks-${ant-task.version}.jar"/>
			
 
				+  <property name="ivy_repo_url" value="${mvnrepo}/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar"/>
			
 
				+  <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml" />
			
 
				+  <property name="ivy.org" value="org.apache.hadoop"/>
			
 
				+  <property name="build.dir" location="build" />
			
 
				+  <property name="dist.dir" value="${build.dir}/${final.name}"/>
			
 
				+  <property name="build.ivy.dir" location="${build.dir}/ivy" />
			
 
				+  <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
			
 
				+  <property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/>
			
 
				+  <property name="build.ivy.report.dir" location="${build.ivy.dir}/report"/>
			
 
				+  <property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven"/>
			
 
				+  <property name="pom.xml" location="${build.ivy.maven.dir}/pom.xml"/>
			
 
				+  <property name="hadoop-common.pom" location="${ivy.dir}/hadoop-common.xml"/>
			
 
				+  <property name="build.ivy.maven.common.jar" location="${build.ivy.maven.dir}/hadoop-common-${version}.jar"/>
			
 
				+  <property name="hadoop-common-test.pom" location="${ivy.dir}/hadoop-common-test.xml" />
			
 
				+  <property name="build.ivy.maven.common-test.jar" location="${build.ivy.maven.dir}/hadoop-common-test-${version}.jar"/>
			
 
				+
			
 
				+  <!--this is the naming policy for artifacts we want pulled down-->
			
 
				+  <property name="ivy.module" location="hadoop-common" />
			
 
				+  <property name="ivy.artifact.retrieve.pattern" value="${ant.project.name}/[conf]/[artifact]-[revision].[ext]"/>
			
 
				+
			
 
				+  <!--this is how artifacts that get built are named-->
			
 
				+  <property name="ivy.publish.pattern" value="[artifact]-[revision].[ext]"/>
			
 
				+  <property name="hadoop-common.jar" location="${build.dir}/${final.name}.jar" />
			
 
				+  <property name="hadoop-common-test.jar" location="${build.dir}/${test.final.name}.jar" />
			
 
				+  <property name="hadoop-common-sources.jar" location="${build.dir}/${final.name}-sources.jar" />
			
 
				+  <property name="hadoop-common-test-sources.jar" location="${build.dir}/${test.final.name}-sources.jar" />
			
 
				+
			
 
				+  <!-- jdiff.home property set -->
			
 
				+  <property name="jdiff.home" value="${build.ivy.lib.dir}/${ant.project.name}/jdiff"/>
			
 
				+  <property name="jdiff.jar" value="${jdiff.home}/jdiff-${jdiff.version}.jar"/>
			
 
				+  <property name="xerces.jar" value="${jdiff.home}/xerces-${xerces.version}.jar"/>
			
 
				+
			
 
				+  <property name="clover.jar" location="${clover.home}/lib/clover.jar"/>
			
 
				+  <available property="clover.present" file="${clover.jar}" />
			
 
				+	
			
 
				+  <!-- Eclipse properties -->
			
 
				+  <property name="build.dir.eclipse" value="build/eclipse"/>
			
 
				+  <property name="build.dir.eclipse-main-classes" value="${build.dir.eclipse}/classes-main"/>
			
 
				+  <property name="build.dir.eclipse-test-classes" value="${build.dir.eclipse}/classes-test"/>
			
 
				+  <property name="build.dir.eclipse-test-generated-classes" value="${build.dir.eclipse}/classes-test-generated"/>
			
 
				+
			
 
				+  <!-- check if clover reports should be generated -->
			
 
				+  <condition property="clover.enabled">
			
 
				+    <and>
			
 
				+        <isset property="run.clover"/>
			
 
				+        <isset property="clover.present"/>
			
 
				+    </and>
			
 
				+  </condition>
			
 
				+
			
 
				+  <condition property="staging">
			
 
				+     <equals arg1="${repo}" arg2="staging"/>
			
 
				+  </condition>
			
 
				+
			
 
				+<!-- the normal classpath -->
			
 
				+  <path id="classpath">
			
 
				+    <pathelement location="${build.classes}"/>
			
 
				+    <pathelement location="${conf.dir}"/>
			
 
				+    <path refid="ivy-common.classpath"/>
			
 
				+  </path>
			
 
				+
			
 
				+  <path id="test.classpath">
			
 
				+    <pathelement location="${test.build.extraconf}"/>
			
 
				+    <pathelement location="${test.core.build.classes}" />
			
 
				+    <pathelement location="${test.src.dir}"/>
			
 
				+    <pathelement location="${test.build.dir}"/>
			
 
				+    <pathelement location="${build.dir}"/>
			
 
				+    <pathelement location="${build.examples}"/>
			
 
				+    <pathelement location="${build.tools}"/>
			
 
				+    <pathelement path="${clover.jar}"/>
			
 
				+    <path refid="ivy-common.classpath"/>
			
 
				+    <path refid="ivy-test.classpath"/>
			
 
				+    <pathelement location="${build.classes}"/>
			
 
				+    <pathelement location="${test.conf.dir}"/>
			
 
				+  </path>
			
 
				+<!--
			
 
				+  <path id="test.hdfs.classpath">
			
 
				+    <pathelement location="${test.hdfs.build.classes}" />
			
 
				+    <path refid="test.classpath"/>
			
 
				+  </path>
			
 
				+
			
 
				+  <path id="test.mapred.classpath">
			
 
				+    <pathelement location="${test.mapred.build.classes}" />
			
 
				+    <path refid="test.hdfs.classpath"/>
			
 
				+  </path>
			
 
				+
			
 
				+  <path id="test.hdfs.with.mr.classpath">
			
 
				+    <pathelement location="${test.hdfs.with.mr.build.classes}" />
			
 
				+    <path refid="test.mapred.classpath"/>
			
 
				+  </path>
			
 
				+-->
			
 
				+  <!-- the cluster test classpath: uses conf.dir for configuration -->
			
 
				+  <path id="test.cluster.classpath">
			
 
				+    <path refid="classpath"/>
			
 
				+    <pathelement location="${test.build.classes}" />
			
 
				+    <pathelement location="${test.src.dir}"/>
			
 
				+    <pathelement location="${build.dir}"/>
			
 
				+  </path>
			
 
				+
			
 
				+
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Macro definitions                                      -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <macrodef name="macro_tar" description="Worker Macro for tar">
			
 
				+    <attribute name="param.destfile"/>
			
 
				+    <element name="param.listofitems"/>
			
 
				+    <sequential>
			
 
				+      <tar compression="gzip" longfile="gnu"
			
 
				+      destfile="@{param.destfile}">
			
 
				+      <param.listofitems/>
			
 
				+      </tar>
			
 
				+    </sequential>
			
 
				+  </macrodef>
			
 
				+
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Stuff needed by all targets                            -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="init" depends="ivy-retrieve-common">
			
 
				+    <mkdir dir="${build.dir}"/>
			
 
				+    <mkdir dir="${build.classes}"/>
			
 
				+    <mkdir dir="${build.src}"/>
			
 
				+    <mkdir dir="${build.webapps}"/>
			
 
				+ 
			
 
				+    <mkdir dir="${test.build.dir}"/>
			
 
				+    <mkdir dir="${test.build.classes}"/>
			
 
				+    <mkdir dir="${test.build.extraconf}"/>
			
 
				+    <tempfile property="touch.temp.file" destDir="${java.io.tmpdir}"/>
			
 
				+    <touch millis="0" file="${touch.temp.file}">
			
 
				+      <fileset dir="${conf.dir}" includes="**/*.template"/>
			
 
				+      <fileset dir="${contrib.dir}" includes="**/*.template"/>
			
 
				+    </touch>
			
 
				+    <delete file="${touch.temp.file}"/>
			
 
				+    <!-- copy all of the jsp and static files -->
			
 
				+
			
 
				+    <copy todir="${conf.dir}" verbose="true">
			
 
				+      <fileset dir="${conf.dir}" includes="**/*.template"/>
			
 
				+      <mapper type="glob" from="*.template" to="*"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <mkdir dir="${test.conf.dir}"/>
			
 
				+    <copy todir="${test.conf.dir}" verbose="true">
			
 
				+      <fileset dir="${conf.dir}" includes="**/*.template"/>
			
 
				+      <mapper type="glob" from="*.template" to="*"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy todir="${contrib.dir}" verbose="true">
			
 
				+      <fileset dir="${contrib.dir}" includes="**/*.template"/>
			
 
				+      <mapper type="glob" from="*.template" to="*"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <exec executable="sh">
			
 
				+       <arg line="src/saveVersion.sh ${version} ${build.dir}"/>
			
 
				+    </exec>
			
 
				+	
			
 
				+   <exec executable="sh">
			
 
				+       <arg line="src/fixFontsPath.sh ${src.docs.cn}"/>
			
 
				+   </exec>
			
 
				+  </target>
			
 
				+
			
 
				+  <import file="${test.src.dir}/aop/build/aop.xml"/>
			
 
				+
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Compile the Java files                                 -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="record-parser" depends="init" if="javacc.home">
			
 
				+      <javacc
			
 
				+          target="${java.src.dir}/org/apache/hadoop/record/compiler/generated/rcc.jj"
			
 
				+          outputdirectory="${java.src.dir}/org/apache/hadoop/record/compiler/generated"
			
 
				+          javacchome="${javacc.home}" />
			
 
				+  </target>
			
 
				+  
			
 
				+  <target name="compile-rcc-compiler" depends="init, record-parser">
			
 
				+    <javac 
			
 
				+        encoding="${build.encoding}" 
			
 
				+        srcdir="${java.src.dir}"
			
 
				+        includes="org/apache/hadoop/record/compiler/**/*.java"
			
 
				+        destdir="${build.classes}"
			
 
				+        debug="${javac.debug}"
			
 
				+        optimize="${javac.optimize}"
			
 
				+        target="${javac.version}"
			
 
				+        source="${javac.version}"
			
 
				+        deprecation="${javac.deprecation}">
			
 
				+        <compilerarg line="${javac.args}"/>
			
 
				+        <classpath refid="classpath"/>
			
 
				+    </javac>
			
 
				+    
			
 
				+    <taskdef name="recordcc" classname="org.apache.hadoop.record.compiler.ant.RccTask">
			
 
				+      <classpath refid="classpath" />
			
 
				+    </taskdef>
			
 
				+  </target>
			
 
				+  
			
 
				+  <target name="compile-core-classes" depends="init, compile-rcc-compiler">
			
 
				+    <!-- Compile Java files (excluding JSPs) checking warnings -->
			
 
				+    <javac 
			
 
				+     encoding="${build.encoding}" 
			
 
				+     srcdir="${java.src.dir};${build.src}"	
			
 
				+     includes="org/apache/hadoop/**/*.java"
			
 
				+     destdir="${build.classes}"
			
 
				+     debug="${javac.debug}"
			
 
				+     optimize="${javac.optimize}"
			
 
				+     target="${javac.version}"
			
 
				+     source="${javac.version}"
			
 
				+     deprecation="${javac.deprecation}">
			
 
				+      <compilerarg line="${javac.args} ${javac.args.warnings}" />
			
 
				+      <classpath refid="classpath"/>
			
 
				+    </javac>
			
 
				+
			
 
				+    <copy todir="${build.classes}">
			
 
				+      <fileset dir="${java.src.dir}" includes="**/*.properties"/>
			
 
				+      <fileset dir="${java.src.dir}" includes="core-default.xml"/>
			
 
				+    </copy>
			
 
				+     
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="compile-native">
			
 
				+    <antcall target="compile-core-native">
			
 
				+      <param name="compile.native" value="true"/>
			
 
				+    </antcall> 
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="compile-core-native" depends="compile-core-classes"
			
 
				+          if="compile.native">
			
 
				+  	
			
 
				+    <mkdir dir="${build.native}/lib"/>
			
 
				+    <mkdir dir="${build.native}/src/org/apache/hadoop/io/compress/zlib"/>
			
 
				+    <mkdir dir="${build.native}/src/org/apache/hadoop/io/nativeio"/>
			
 
				+    <mkdir dir="${build.native}/src/org/apache/hadoop/security"/>
			
 
				+
			
 
				+  	<javah 
			
 
				+  	  classpath="${build.classes}"
			
 
				+  	  destdir="${build.native}/src/org/apache/hadoop/io/compress/zlib"
			
 
				+      force="yes"
			
 
				+  	  verbose="yes"
			
 
				+  	  >
			
 
				+  	  <class name="org.apache.hadoop.io.compress.zlib.ZlibCompressor" />
			
 
				+      <class name="org.apache.hadoop.io.compress.zlib.ZlibDecompressor" />
			
 
				+  	</javah>
			
 
				+
			
 
				+  	<javah
			
 
				+  	  classpath="${build.classes}"
			
 
				+  	  destdir="${build.native}/src/org/apache/hadoop/security"
			
 
				+      force="yes"
			
 
				+  	  verbose="yes"
			
 
				+  	  >
			
 
				+  	  <class name="org.apache.hadoop.security.JniBasedUnixGroupsMapping" />
			
 
				+  	</javah>
			
 
				+  	<javah
			
 
				+  	  classpath="${build.classes}"
			
 
				+  	  destdir="${build.native}/src/org/apache/hadoop/io/nativeio"
			
 
				+      force="yes"
			
 
				+  	  verbose="yes"
			
 
				+  	  >
			
 
				+  	  <class name="org.apache.hadoop.io.nativeio.NativeIO" />
			
 
				+  	</javah>
			
 
				+
			
 
				+  	<javah
			
 
				+  	  classpath="${build.classes}"
			
 
				+  	  destdir="${build.native}/src/org/apache/hadoop/security"
			
 
				+      force="yes"
			
 
				+  	  verbose="yes"
			
 
				+  	  >
			
 
				+  	  <class name="org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping" />
			
 
				+  	</javah>
			
 
				+
			
 
				+	<exec dir="${build.native}" executable="sh" failonerror="true">
			
 
				+	  <env key="OS_NAME" value="${os.name}"/>
			
 
				+	  <env key="OS_ARCH" value="${os.arch}"/>
			
 
				+	  <env key="JVM_DATA_MODEL" value="${sun.arch.data.model}"/>
			
 
				+	  <env key="HADOOP_NATIVE_SRCDIR" value="${native.src.dir}"/>
			
 
				+	  <arg line="${native.src.dir}/configure"/>
			
 
				+    </exec>
			
 
				+
			
 
				+    <exec dir="${build.native}" executable="${make.cmd}" failonerror="true">
			
 
				+      <env key="OS_NAME" value="${os.name}"/>
			
 
				+      <env key="OS_ARCH" value="${os.arch}"/>
			
 
				+  	  <env key="JVM_DATA_MODEL" value="${sun.arch.data.model}"/>
			
 
				+  	  <env key="HADOOP_NATIVE_SRCDIR" value="${native.src.dir}"/>
			
 
				+    </exec>
			
 
				+
			
 
				+	<exec dir="${build.native}" executable="sh" failonerror="true">
			
 
				+	  <arg line="${build.native}/libtool --mode=install cp ${build.native}/libhadoop.la ${build.native}/lib"/>
			
 
				+    </exec>
			
 
				+
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="compile-core"
			
 
				+          depends="clover,compile-core-classes,
			
 
				+  	compile-core-native" 
			
 
				+  	description="Compile core only">
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="compile-contrib" depends="compile-core">
			
 
				+     <subant target="compile">
			
 
				+        <property name="version" value="${version}"/>
			
 
				+        <fileset file="${contrib.dir}/build.xml"/>
			
 
				+     </subant>  	
			
 
				+  </target>
			
 
				+  
			
 
				+  <target name="compile" depends="compile-core, compile-contrib" description="Compile core, contrib">
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Make hadoop-common.jar                                               -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!--                                                                    -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="jar" depends="compile-core" description="Make hadoop-common.jar">
			
 
				+    <tar compression="gzip" destfile="${build.classes}/bin.tgz">
			
 
				+      <tarfileset dir="bin" mode="755"/>
			
 
				+    </tar>
			
 
				+    <property name="jar.properties.list" value="commons-logging.properties, log4j.properties, hadoop-metrics.properties" />
			
 
				+    <jar jarfile="${build.dir}/${final.name}.jar"
			
 
				+         basedir="${build.classes}">
			
 
				+      <manifest>
			
 
				+        <section name="org/apache/hadoop">
			
 
				+          <attribute name="Implementation-Title" value="${ant.project.name}"/>
			
 
				+          <attribute name="Implementation-Version" value="${version}"/>
			
 
				+          <attribute name="Implementation-Vendor" value="Apache"/>
			
 
				+        </section>
			
 
				+      </manifest>
			
 
				+      <fileset dir="${conf.dir}" includes="${jar.properties.list}" />
			
 
				+      <fileset file="${jar.extra.properties.list}" />
			
 
				+    </jar>
			
 
				+
			
 
				+    <jar jarfile="${hadoop-common-sources.jar}">
			
 
				+      <fileset dir="${java.src.dir}" includes="org/apache/hadoop/**/*.java"/>
			
 
				+      <fileset dir="${build.src}" includes="org/apache/hadoop/**/*.java"/>
			
 
				+    </jar>
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Make the Hadoop metrics jar. (for use outside Hadoop)              -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!--                                                                    -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="metrics.jar" depends="compile-core" description="Make the Hadoop metrics jar. (for use outside Hadoop)">
			
 
				+    <jar jarfile="${build.dir}/hadoop-metrics-${version}.jar"
			
 
				+         basedir="${build.classes}">
			
 
				+      <include name="**/metrics/**" />
			
 
				+      <exclude name="**/package.html" />
			
 
				+    </jar>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="metrics2.jar" depends="compile-core" description="Make the Hadoop metrics2 framework jar (for use plugin development)">
			
 
				+    <jar jarfile="${build.dir}/hadoop-metrics2-${version}.jar"
			
 
				+         basedir="${build.classes}">
			
 
				+      <include name="**/metrics2/**" />
			
 
				+    </jar>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="generate-test-records" depends="compile-rcc-compiler">
			
 
				+    <recordcc destdir="${test.generated.dir}">
			
 
				+      <fileset dir="${test.src.dir}"
			
 
				+	         includes="**/*.jr" />
			
 
				+    </recordcc>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="generate-avro-records" depends="init, ivy-retrieve-test">
			
 
				+    <taskdef name="schema" classname="org.apache.avro.specific.SchemaTask">
			
 
				+      <classpath refid="test.classpath"/>
			
 
				+    </taskdef>
			
 
				+    <schema destdir="${test.generated.dir}">
			
 
				+      <fileset dir="${test.src.dir}">
			
 
				+        <include name="**/*.avsc" />
			
 
				+      </fileset>
			
 
				+    </schema>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="generate-avro-protocols" depends="init, ivy-retrieve-test">
			
 
				+    <taskdef name="schema" classname="org.apache.avro.specific.ProtocolTask">
			
 
				+      <classpath refid="test.classpath"/>
			
 
				+    </taskdef>
			
 
				+    <schema destdir="${test.generated.dir}">
			
 
				+      <fileset dir="${test.src.dir}">
			
 
				+        <include name="**/*.avpr" />
			
 
				+     </fileset>
			
 
				+    </schema>
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Compile test code                                                  --> 
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- This is a wrapper for fault-injection needs-->
			
 
				+  <target name="-classes-compilation"
			
 
				+    depends="compile-core-classes, compile-core-test"/> 
			
 
				+
			
 
				+  <target name="compile-core-test" depends="compile-core-classes, ivy-retrieve-test, generate-test-records, generate-avro-records, generate-avro-protocols">
			
 
				+    <mkdir dir="${test.core.build.classes}"/>
			
 
				+    <javac 
			
 
				+     encoding="${build.encoding}" 
			
 
				+     srcdir="${test.generated.dir}"
			
 
				+     includes="org/apache/hadoop/**/*.java"
			
 
				+     destdir="${test.core.build.classes}"
			
 
				+     debug="${javac.debug}"
			
 
				+     optimize="${javac.optimize}"
			
 
				+     target="${javac.version}"
			
 
				+     source="${javac.version}"
			
 
				+     deprecation="${javac.deprecation}">
			
 
				+      <compilerarg line="${javac.args}"/>
			
 
				+      <classpath refid="test.classpath"/>
			
 
				+    </javac>
			
 
				+    <javac 
			
 
				+     encoding="${build.encoding}" 
			
 
				+     srcdir="${test.src.dir}/core"
			
 
				+     includes="org/apache/hadoop/**/*.java"
			
 
				+     destdir="${test.core.build.classes}"
			
 
				+     debug="${javac.debug}"
			
 
				+     optimize="${javac.optimize}"
			
 
				+     target="${javac.version}"
			
 
				+     source="${javac.version}"
			
 
				+     deprecation="${javac.deprecation}">
			
 
				+      <compilerarg line="${javac.args} ${javac.args.warnings}" />
			
 
				+      <classpath refid="test.classpath"/>
			
 
				+     </javac>
			
 
				+
			
 
				+    <taskdef
			
 
				+       name="paranamer" 
			
 
				+       classname="com.thoughtworks.paranamer.ant.ParanamerGeneratorTask">
			
 
				+      <classpath refid="classpath" />
			
 
				+    </taskdef>
			
 
				+    <paranamer sourceDirectory="${test.src.dir}/core"
			
 
				+	       outputDirectory="${test.core.build.classes}"/>
			
 
				+
			
 
				+    <delete dir="${test.cache.data}"/>
			
 
				+    <mkdir dir="${test.cache.data}"/>
			
 
				+    <copy file="${test.src.dir}/core/org/apache/hadoop/cli/testConf.xml" todir="${test.cache.data}"/>
			
 
				+
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Make hadoop-test.jar                                               -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!--                                                                    -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="jar-test" depends="compile-core-test" description="Make hadoop-test.jar">
			
 
				+    <copy todir="${test.build.classes}">
			
 
				+      <fileset dir="${test.core.build.classes}"/>
			
 
				+    </copy>
			
 
				+    <jar jarfile="${build.dir}/${test.final.name}.jar"
			
 
				+         basedir="${test.build.classes}">
			
 
				+         <manifest>
			
 
				+           <attribute name="Main-Class"
			
 
				+                      value="org/apache/hadoop/test/CoreTestDriver"/>
			
 
				+          <section name="org/apache/hadoop">
			
 
				+            <attribute name="Implementation-Title" value="${ant.project.name}"/>
			
 
				+            <attribute name="Implementation-Version" value="${version}"/>
			
 
				+            <attribute name="Implementation-Vendor" value="Apache"/>
			
 
				+          </section>
			
 
				+         </manifest>
			
 
				+    </jar>
			
 
				+
			
 
				+    <jar jarfile="${hadoop-common-test-sources.jar}">
			
 
				+      <fileset dir="${test.generated.dir}" includes="org/apache/hadoop/**/*.java"/>
			
 
				+      <fileset dir="${test.src.dir}/core" includes="org/apache/hadoop/**/*.java"/>
			
 
				+    </jar>
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Fault injection customization section.
			
 
				+       These targets ought to be copied over to other projects and modified
			
 
				+       as needed -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="run-test-core-fault-inject" depends="injectfaults" 
			
 
				+	  description="Run full set of the unit tests with fault injection">
			
 
				+    <macro-run-tests-fault-inject target.name="run-test-core"
			
 
				+      testcasesonly="false"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="jar-test-fault-inject" depends="injectfaults" 
			
 
				+    description="Make hadoop-test-fi.jar">
			
 
				+    <macro-jar-test-fault-inject
			
 
				+      target.name="jar-test"
			
 
				+      jar.final.name="test.final.name"
			
 
				+      jar.final.value="${test.final.name}-fi" />
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="jar-fault-inject" depends="injectfaults" 
			
 
				+    description="Make hadoop-fi.jar">
			
 
				+    <macro-jar-fault-inject
			
 
				+      target.name="jar"
			
 
				+      build.dir="${build-fi.dir}"
			
 
				+      jar.final.name="final.name"
			
 
				+      jar.final.value="${final.name}-fi" />
			
 
				+  </target>
			
 
				+
			
 
				+  <!--This target is not included into the the top level list of target
			
 
				+  for it serves a special "regression" testing purpose of non-FI tests in
			
 
				+  FI environment -->
			
 
				+  <target name="run-fault-inject-with-testcaseonly" depends="injectfaults">
			
 
				+    <fail unless="testcase">Can't run this target without -Dtestcase setting!
			
 
				+    </fail>
			
 
				+    <macro-run-tests-fault-inject target.name="run-test-core" 
			
 
				+      testcasesonly="true"/>
			
 
				+  </target>
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- End of Fault injection customization section                       -->
			
 
				+  <!-- ================================================================== -->
			
 
				+
			
 
				+  <condition property="tests.notestcase">
			
 
				+    <and>
			
 
				+      <isfalse value="${test.fault.inject}"/>
			
 
				+      <not>
			
 
				+        <isset property="testcase"/>
			
 
				+      </not>
			
 
				+    </and>
			
 
				+  </condition>
			
 
				+  <condition property="tests.notestcase.fi">
			
 
				+    <and>
			
 
				+      <not>
			
 
				+        <isset property="testcase" />
			
 
				+      </not>
			
 
				+      <istrue value="${test.fault.inject}" />
			
 
				+    </and>
			
 
				+  </condition>
			
 
				+  <condition property="tests.testcase">
			
 
				+    <and>
			
 
				+      <isfalse value="${test.fault.inject}" />
			
 
				+      <isset property="testcase" />
			
 
				+    </and>
			
 
				+  </condition>
			
 
				+  <condition property="tests.testcaseonly">
			
 
				+    <istrue value="${special.fi.testcasesonly}" />
			
 
				+  </condition>
			
 
				+  <condition property="tests.testcase.fi">
			
 
				+    <and>
			
 
				+      <istrue value="${test.fault.inject}" />
			
 
				+      <isset property="testcase" />
			
 
				+      <isfalse value="${special.fi.testcasesonly}" />
			
 
				+    </and>
			
 
				+  </condition>
			
 
				+	     
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Run unit tests                                                     --> 
			
 
				+  <!-- ================================================================== -->
			
 
				+  <macrodef name="macro-test-runner">
			
 
				+    <attribute name="test.file" />
			
 
				+    <attribute name="classpath" />
			
 
				+    <attribute name="test.dir" />
			
 
				+    <attribute name="fileset.dir" />
			
 
				+    <attribute name="hadoop.conf.dir.deployed" default="" />
			
 
				+    <attribute name="test.krb5.conf.filename" default="" />
			
 
				+    <sequential>
			
 
				+      <delete file="${test.build.dir}/testsfailed"/>
			
 
				+      <delete dir="@{test.dir}/data" />
			
 
				+      <mkdir dir="@{test.dir}/data" />
			
 
				+      <delete dir="${test.build.webapps}"/>
			
 
				+      <copy todir="${test.build.webapps}">
			
 
				+        <fileset dir="${test.src.dir}/test-webapps" includes="**/*" />
			
 
				+      </copy>
			
 
				+      <delete dir="@{test.dir}/logs" />
			
 
				+      <mkdir dir="@{test.dir}/logs" />
			
 
				+      <copy file="${test.src.dir}/hadoop-policy.xml"
			
 
				+            todir="@{test.dir}/extraconf" />
			
 
				+      <copy file="${test.src.dir}/fi-site.xml"
			
 
				+            todir="@{test.dir}/extraconf" />
			
 
				+      <junit showoutput="${test.output}"
			
 
				+             printsummary="${test.junit.printsummary}"
			
 
				+             haltonfailure="${test.junit.haltonfailure}"
			
 
				+             fork="yes"
			
 
				+             forkmode="${test.junit.fork.mode}"
			
 
				+             maxmemory="${test.junit.maxmemory}"
			
 
				+             dir="${basedir}"
			
 
				+             timeout="${test.timeout}"
			
 
				+             errorProperty="tests.failed"
			
 
				+             failureProperty="tests.failed">
			
 
				+        <jvmarg value="-ea" />
			
 
				+        <sysproperty key="test.build.data" value="${test.build.data}" />
			
 
				+        <sysproperty key="test.cache.data" value="${test.cache.data}" />
			
 
				+        <sysproperty key="test.debug.data" value="${test.debug.data}" />
			
 
				+        <sysproperty key="hadoop.log.dir" value="${test.log.dir}" />
			
 
				+        <sysproperty key="test.src.dir" value="${test.src.dir}" />
			
 
				+        <sysproperty key="test.build.extraconf" value="@{test.dir}/extraconf" />
			
 
				+         <sysproperty key="java.security.krb5.conf" value="@{test.krb5.conf.filename}"/>
			
 
				+        <sysproperty key="hadoop.policy.file" value="hadoop-policy.xml" />
			
 
				+        <sysproperty key="java.library.path"
			
 
				+          value="${build.native}/lib:${lib.dir}/native/${build.platform}"/>
			
 
				+        <sysproperty key="install.c++.examples" value="${install.c++.examples}"/>
			
 
				+        <!-- set io.compression.codec.lzo.class in the child jvm only if it is set -->
			
 
				+        <syspropertyset dynamic="no">
			
 
				+          <propertyref name="io.compression.codec.lzo.class"/>
			
 
				+        </syspropertyset>
			
 
				+        <!-- set compile.c++ in the child jvm only if it is set -->
			
 
				+        <syspropertyset dynamic="no">
			
 
				+          <propertyref name="compile.c++"/>
			
 
				+        </syspropertyset>
			
 
				+        <classpath refid="@{classpath}" />
			
 
				+        <!-- Pass probability specifications to the spawn JVM -->
			
 
				+        <syspropertyset id="FaultProbabilityProperties">
			
 
				+          <propertyref regex="fi.*"/>
			
 
				+        </syspropertyset>
			
 
				+        <sysproperty key="test.system.hdrc.deployed.hadoopconfdir"
			
 
				+                     value="@{hadoop.conf.dir.deployed}" />
			
 
				+        <!-- user to group mapping class for TestAccessControlList -->
			
 
				+        <syspropertyset dynamic="no">
			
 
				+          <propertyref name="TestAccessControlListGroupMapping"/>
			
 
				+        </syspropertyset>
			
 
				+        <formatter type="${test.junit.output.format}" />
			
 
				+        <batchtest todir="@{test.dir}" if="tests.notestcase">
			
 
				+          <fileset dir="@{fileset.dir}/core"
			
 
				+                   excludes="**/${test.exclude}.java aop/** system/**">
			
 
				+             <patternset>
			
 
				+               <includesfile name="@{test.file}"/>
			
 
				+             </patternset>
			
 
				+         </fileset>
			
 
				+        </batchtest>
			
 
				+        <batchtest todir="${test.build.dir}" if="tests.notestcase.fi">
			
 
				+          <fileset dir="@{fileset.dir}/aop"
			
 
				+                   includes="**/${test.include}.java"
			
 
				+                   excludes="**/${test.exclude}.java" />
			
 
				+         </batchtest>
			
 
				+         <batchtest todir="@{test.dir}" if="tests.testcase">
			
 
				+           <fileset dir="@{fileset.dir}/core"
			
 
				+             includes="**/${testcase}.java" excludes="aop/** system/**"/>
			
 
				+         </batchtest>
			
 
				+         <batchtest todir="${test.build.dir}" if="tests.testcase.fi">
			
 
				+           <fileset dir="@{fileset.dir}/aop" includes="**/${testcase}.java" />
			
 
				+         </batchtest>
			
 
				+         <!--The following batch is for very special occasions only when
			
 
				+                a non-FI tests are needed to be executed against FI-environment -->
			
 
				+         <batchtest todir="${test.build.dir}" if="tests.testcaseonly">
			
 
				+           <fileset dir="@{fileset.dir}/core" includes="**/${testcase}.java" />
			
 
				+         </batchtest>
			
 
				+      </junit>
			
 
				+      <antcall target="checkfailure"/>
			
 
				+    </sequential>
			
 
				+  </macrodef>
			
 
				+
			
 
				+  <target name="run-test-core" depends="compile-core-test" description="Run core unit tests">
			
 
				+    <macro-test-runner test.file="${test.all.tests.file}"
			
 
				+                       classpath="${test.classpath.id}"
			
 
				+                       test.dir="${test.build.dir}"
			
 
				+                       fileset.dir="${test.src.dir}"
			
 
				+                       test.krb5.conf.filename="${test.src.dir}/krb5.conf"
			
 
				+                       >
			
 
				+    </macro-test-runner>
			
 
				+  </target>   
			
 
				+
			
 
				+  <target name="checkfailure" if="tests.failed">
			
 
				+    <touch file="${test.build.dir}/testsfailed"/>
			
 
				+    <fail unless="continueOnFailure">Tests failed!</fail>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="test-contrib" depends="compile, compile-core-test" description="Run contrib unit tests">
			
 
				+    <subant target="test">
			
 
				+       <property name="version" value="${version}"/>
			
 
				+       <property name="clover.jar" value="${clover.jar}"/>
			
 
				+       <fileset file="${contrib.dir}/build.xml"/>
			
 
				+    </subant> 
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="test-core" description="Run core unit tests"
			
 
				+          depends="run-test-core"/>
			
 
				+
			
 
				+  <target name="test-fi" description="Run fi unit tests"
			
 
				+          depends="run-test-core-fault-inject"/>
			
 
				+
			
 
				+  <target name="test" depends="jar-test" description="Run all unit tests">
			
 
				+    <delete file="${test.build.dir}/testsfailed"/> 
			
 
				+    <property name="continueOnFailure" value="true"/> 
			
 
				+    <antcall target="run-test-core"/>
			
 
				+    <antcall target="run-test-core-fault-inject"/>
			
 
				+    <subant target="test-contrib">	 
			
 
				+      <fileset dir="." includes="build.xml"/>
			
 
				+    </subant>
			
 
				+    <available file="${test.build.dir}/testsfailed" property="testsfailed"/>
			
 
				+    <fail if="testsfailed">Tests failed!</fail>
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- Run all unit tests, not just Test*, and use non-test configuration. -->
			
 
				+  <target name="test-cluster" description="Run all unit tests, not just Test*, and use non-test configuration.">
			
 
				+    <antcall target="test">
			
 
				+      <param name="test.include" value="*"/>
			
 
				+      <param name="test.classpath.id" value="test.cluster.classpath"/>
			
 
				+    </antcall>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="nightly" depends="test, tar">
			
 
				+  </target>
			
 
				+	
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Run optional third-party tool targets                              --> 
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="checkstyle" depends="ivy-retrieve-checkstyle,check-for-checkstyle" if="checkstyle.present" 
			
 
				+       description="Run optional third-party tool targets">
			
 
				+       <taskdef resource="checkstyletask.properties">
			
 
				+         <classpath refid="checkstyle-classpath"/>
			
 
				+       </taskdef>
			
 
				+  
			
 
				+	<mkdir dir="${test.build.dir}"/>
			
 
				+  	
			
 
				+  	<checkstyle config="${test.src.dir}/checkstyle.xml"
			
 
				+  		failOnViolation="false">
			
 
				+      <fileset dir="${java.src.dir}" includes="**/*.java" excludes="**/generated/**"/>
			
 
				+      <formatter type="xml" toFile="${test.build.dir}/checkstyle-errors.xml"/>
			
 
				+  	</checkstyle>
			
 
				+  	
			
 
				+  	<xslt style="${test.src.dir}/checkstyle-noframes-sorted.xsl"
			
 
				+        in="${test.build.dir}/checkstyle-errors.xml"
			
 
				+        out="${test.build.dir}/checkstyle-errors.html"/>
			
 
				+  </target>
			
 
				+	
			
 
				+  <target name="check-for-checkstyle">
			
 
				+    <available property="checkstyle.present" resource="checkstyletask.properties">
			
 
				+       <classpath refid="checkstyle-classpath"/>
			
 
				+    </available>  	
			
 
				+  </target>
			
 
				+
			
 
				+
			
 
				+ <property name="findbugs.home" value=""/>
			
 
				+  <target name="findbugs" depends="check-for-findbugs, jar" if="findbugs.present" description="Run findbugs if present">
			
 
				+    <property environment="env"/>
			
 
				+    <property name="findbugs.out.dir" value="${test.build.dir}/findbugs"/>
			
 
				+    <property name="findbugs.exclude.file" value="${test.src.dir}/findbugsExcludeFile.xml"/>
			
 
				+    <property name="findbugs.report.htmlfile" value="${findbugs.out.dir}/hadoop-findbugs-report.html"/>
			
 
				+    <property name="findbugs.report.xmlfile" value="${findbugs.out.dir}/hadoop-findbugs-report.xml"/>
			
 
				+    <taskdef name="findbugs" classname="edu.umd.cs.findbugs.anttask.FindBugsTask"
			
 
				+        classpath="${findbugs.home}/lib/findbugs-ant.jar" />
			
 
				+
			
 
				+        <mkdir dir="${findbugs.out.dir}"/>
			
 
				+
			
 
				+    <findbugs home="${findbugs.home}" output="xml:withMessages"
			
 
				+        outputFile="${findbugs.report.xmlfile}" effort="max"
			
 
				+        excludeFilter="${findbugs.exclude.file}" jvmargs="-Xmx512M">
			
 
				+      <auxClasspath>
			
 
				+        <fileset dir="${env.ANT_HOME}/lib">
			
 
				+          <include name="ant.jar"/>
			
 
				+          <include name="ant-launcher.jar"/>
			
 
				+        </fileset>
			
 
				+        <fileset dir="${build.ivy.lib.dir}/${ant.project.name}/common">
			
 
				+          <include name="**/*.jar"/>
			
 
				+        </fileset>
			
 
				+      </auxClasspath>
			
 
				+      <sourcePath path="${java.src.dir}"/>
			
 
				+      <class location="${basedir}/build/${final.name}.jar" />
			
 
				+    </findbugs>
			
 
				+
			
 
				+        <xslt style="${findbugs.home}/src/xsl/default.xsl"
			
 
				+        in="${findbugs.report.xmlfile}"
			
 
				+        out="${findbugs.report.htmlfile}"/>
			
 
				+  </target>
			
 
				+	
			
 
				+  <target name="check-for-findbugs">
			
 
				+    <available property="findbugs.present"
			
 
				+        file="${findbugs.home}/lib/findbugs.jar" />
			
 
				+  </target>
			
 
				+
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Documentation                                                      -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  
			
 
				+  <target name="docs" depends="forrest.check" description="Generate forrest-based documentation. 
			
 
				+       To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." if="forrest.home">
			
 
				+    <exec dir="${docs.src}" executable="${forrest.home}/bin/forrest"
			
 
				+	  failonerror="true">
			
 
				+      <env key="JAVA_HOME" value="${java5.home}"/>
			
 
				+    </exec>
			
 
				+    <copy todir="${build.docs}">
			
 
				+      <fileset dir="${docs.src}/build/site/" />
			
 
				+    </copy>
			
 
				+    <copy file="${docs.src}/releasenotes.html" todir="${build.docs}"/>
			
 
				+    <style basedir="${java.src.dir}" destdir="${build.docs}"
			
 
				+           includes="core-default.xml" style="conf/configuration.xsl"/>
			
 
				+    <antcall target="changes-to-html"/>
			
 
				+    <antcall target="cn-docs"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="cn-docs" depends="forrest.check, init" description="Generate forrest-based Chinese documentation. 
			
 
				+        To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." 
			
 
				+        if="forrest.home">
			
 
				+    <exec dir="${src.docs.cn}" executable="${forrest.home}/bin/forrest" failonerror="true">
			
 
				+      <env key="LANG" value="en_US.utf8"/>
			
 
				+      <env key="JAVA_HOME" value="${java5.home}"/>
			
 
				+    </exec>
			
 
				+    <copy todir="${build.docs.cn}">
			
 
				+      <fileset dir="${src.docs.cn}/build/site/" />
			
 
				+    </copy>
			
 
				+    <style basedir="${java.src.dir}" destdir="${build.docs.cn}"
			
 
				+           includes="core-default.xml" style="conf/configuration.xsl"/>
			
 
				+    <antcall target="changes-to-html"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="forrest.check" unless="forrest.home" depends="java5.check">
			
 
				+    <fail message="'forrest.home' is not defined. Please pass 
			
 
				+      -Dforrest.home=&lt;base of Apache Forrest installation&gt; to Ant on the command-line." />
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="java5.check" unless="java5.home">
			
 
				+    <fail message="'java5.home' is not defined.  Forrest requires Java 5.  
			
 
				+       Please pass -Djava5.home=&lt;base of Java 5 distribution&gt; to Ant on the command-line." />
			
 
				+  </target>
			
 
				+	
			
 
				+  <target name="javadoc-dev" depends="compile, ivy-retrieve-javadoc" description="Generate javadoc for hadoop developers">
			
 
				+    <mkdir dir="${build.javadoc.dev}"/>
			
 
				+    <javadoc
			
 
				+      overview="${java.src.dir}/overview.html"
			
 
				+      packagenames="org.apache.hadoop.*"
			
 
				+      destdir="${build.javadoc.dev}"
			
 
				+      author="true"
			
 
				+      version="true"
			
 
				+      use="true"
			
 
				+      windowtitle="${Name} ${version} API"
			
 
				+      doctitle="${Name} ${version} Developer API"
			
 
				+      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
			
 
				+      maxmemory="${javadoc.maxmemory}">
			
 
				+        <packageset dir="${java.src.dir}"/>
			
 
				+	<packageset dir="src/contrib/failmon/src/java/"/> 
			
 
				+
			
 
				+        <link href="${javadoc.link.java}"/>
			
 
				+
			
 
				+        <classpath >
			
 
				+          <path refid="classpath" />
			
 
				+          <fileset dir="src/contrib/">
			
 
				+            <include name="*/lib/*.jar" />
			
 
				+          </fileset>
			
 
				+          <path refid="javadoc-classpath"/>
			
 
				+          <pathelement path="${java.class.path}"/>
			
 
				+          <pathelement location="${build.tools}"/>
			
 
				+        </classpath>
			
 
				+
			
 
				+    	<group title="Core" packages="org.apache.*"/>
			
 
				+        <group title="contrib: FailMon" packages="org.apache.hadoop.contrib.failmon*"/>
			
 
				+
			
 
				+    </javadoc>
			
 
				+  </target>	
			
 
				+
			
 
				+  <target name="javadoc-uptodate" depends="compile, ivy-retrieve-javadoc">
			
 
				+    <uptodate property="javadoc.is.uptodate">
			
 
				+      <srcfiles dir="${src.dir}">
			
 
				+        <include name="**/*.java" />
			
 
				+        <include name="**/*.html" />
			
 
				+      </srcfiles>
			
 
				+      <mapper type="merge" to="${build.javadoc.timestamp}" />
			
 
				+    </uptodate>
			
 
				+  </target>
			
 
				+ 
			
 
				+  <target name="javadoc" description="Generate javadoc" depends="jar, javadoc-uptodate"
			
 
				+       unless="javadoc.is.uptodate">
			
 
				+    <mkdir dir="${build.javadoc}"/>
			
 
				+    <javadoc
			
 
				+      overview="${java.src.dir}/overview.html"
			
 
				+      packagenames="org.apache.hadoop.*"
			
 
				+      destdir="${build.javadoc}"
			
 
				+      author="true"
			
 
				+      version="true"
			
 
				+      use="true"
			
 
				+      windowtitle="${Name} ${version} API"
			
 
				+      doctitle="${Name} ${version} API"
			
 
				+      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
			
 
				+      maxmemory="${javadoc.maxmemory}">
			
 
				+        <packageset dir="${java.src.dir}"/>
			
 
				+	<packageset dir="src/contrib/failmon/src/java/"/> 
			
 
				+	
			
 
				+        <link href="${javadoc.link.java}"/>
			
 
				+
			
 
				+        <classpath >
			
 
				+          <path refid="classpath" />
			
 
				+          <fileset dir="src/contrib/">
			
 
				+            <include name="*/lib/*.jar" />
			
 
				+          </fileset>
			
 
				+          <path refid="javadoc-classpath"/>
			
 
				+          <pathelement path="${java.class.path}"/>
			
 
				+          <pathelement location="${build.tools}"/>
			
 
				+        </classpath>
			
 
				+
			
 
				+       <group title="Core" packages="org.apache.*"/>
			
 
				+       <group title="contrib: FailMon" packages="org.apache.hadoop.contrib.failmon*"/>
			
 
				+       <doclet name="org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsStandardDoclet"
			
 
				+               path="${build.dir}/${final.name}.jar"/>
			
 
				+    </javadoc>
			
 
				+  </target>	
			
 
				+
			
 
				+  <target name="api-xml" depends="ivy-retrieve-jdiff,javadoc,write-null">
			
 
				+    <javadoc maxmemory="${javadoc.maxmemory}">
			
 
				+       <doclet name="org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsJDiffDoclet"
			
 
				+               path="${build.dir}/${final.name}.jar:${jdiff.jar}:${xerces.jar}">
			
 
				+         <param name="-apidir" value="${jdiff.xml.dir}"/>
			
 
				+         <param name="-apiname" value="hadoop-core ${version}"/>
			
 
				+         <param name="${jdiff.stability}"/>
			
 
				+       </doclet>
			
 
				+       <packageset dir="src/java"/>
			
 
				+       <classpath >
			
 
				+         <path refid="classpath" />
			
 
				+         <path refid="jdiff-classpath" />
			
 
				+         <pathelement path="${java.class.path}"/>
			
 
				+       </classpath>
			
 
				+    </javadoc>
			
 
				+  </target>
			
 
				+	
			
 
				+  <target name="write-null">
			
 
				+	<exec executable="touch">
			
 
				+	   <arg value="${jdiff.home}/Null.java"/>
			
 
				+        </exec>
			
 
				+  </target> 
			
 
				+
			
 
				+  <target name="api-report" depends="ivy-retrieve-jdiff,api-xml">
			
 
				+    <mkdir dir="${jdiff.build.dir}"/>
			
 
				+    <javadoc sourcepath="src/java"
			
 
				+             destdir="${jdiff.build.dir}"
			
 
				+	     sourceFiles="${jdiff.home}/Null.java"
			
 
				+	     maxmemory="${javadoc.maxmemory}">
			
 
				+       <doclet name="org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsJDiffDoclet"
			
 
				+              path="${build.dir}/${final.name}.jar:${jdiff.jar}:${xerces.jar}">
			
 
				+         <param name="-oldapi" value="hadoop-core ${jdiff.stable}"/>
			
 
				+         <param name="-newapi" value="hadoop-core ${version}"/>
			
 
				+         <param name="-oldapidir" value="${jdiff.xml.dir}"/>
			
 
				+         <param name="-newapidir" value="${jdiff.xml.dir}"/>
			
 
				+         <param name="-javadocold" value="${jdiff.stable.javadoc}"/>
			
 
				+         <param name="-javadocnew" value="../../api/"/>
			
 
				+         <param name="-stats"/>
			
 
				+         <param name="${jdiff.stability}"/>
			
 
				+         <param name="${jdiff.compatibility}"/>
			
 
				+       </doclet>
			
 
				+       <classpath >
			
 
				+         <path refid="classpath" />
			
 
				+         <path refid="jdiff-classpath"/>
			
 
				+         <pathelement path="${java.class.path}"/>
			
 
				+       </classpath>
			
 
				+    </javadoc>
			
 
				+  </target>
			
 
				+	
			
 
				+  <target name="changes-to-html" description="Convert CHANGES.txt into an html file">
			
 
				+    <mkdir dir="${build.docs}"/>
			
 
				+    <exec executable="perl" input="CHANGES.txt" output="${build.docs}/changes.html" failonerror="true">
			
 
				+      <arg value="${changes.src}/changes2html.pl"/>
			
 
				+    </exec>
			
 
				+    <copy todir="${build.docs}">
			
 
				+      <fileset dir="${changes.src}" includes="*.css"/>
			
 
				+    </copy>
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- D I S T R I B U T I O N                                            -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!--                                                                    -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="package" depends="compile, jar, javadoc, docs, api-report, jar-test"
			
 
				+	  description="Build distribution">
			
 
				+    <mkdir dir="${dist.dir}"/>
			
 
				+    <mkdir dir="${dist.dir}/lib"/>
			
 
				+    <mkdir dir="${dist.dir}/contrib"/>
			
 
				+    <mkdir dir="${dist.dir}/bin"/>
			
 
				+    <mkdir dir="${dist.dir}/docs"/>
			
 
				+    <mkdir dir="${dist.dir}/docs/api"/>
			
 
				+    <mkdir dir="${dist.dir}/docs/jdiff"/>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
			
 
				+      <fileset dir="${common.ivy.lib.dir}"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false">
			
 
				+      <fileset dir="lib">
			
 
				+        <exclude name="**/native/**"/>
			
 
				+      </fileset>
			
 
				+    </copy>
			
 
				+
			
 
				+  	<exec dir="${dist.dir}" executable="sh" failonerror="true">
			
 
				+	  <env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
			
 
				+	  <env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
			
 
				+	  <env key="DIST_LIB_DIR" value="${dist.dir}/lib/native"/>
			
 
				+	  <arg line="${native.src.dir}/packageNativeHadoop.sh"/>
			
 
				+    </exec>
			
 
				+
			
 
				+    <subant target="package">
			
 
				+      <!--Pass down the version in case its needed again and the target
			
 
				+      distribution directory so contribs know where to install to.-->
			
 
				+      <property name="version" value="${version}"/>
			
 
				+      <property name="dist.dir" value="${dist.dir}"/>
			
 
				+      <fileset file="${contrib.dir}/build.xml"/>
			
 
				+    </subant>  	
			
 
				+
			
 
				+    <copy todir="${dist.dir}"> 
			
 
				+      <fileset file="${build.dir}/${final.name}.jar"/>
			
 
				+      <fileset file="${build.dir}/${test.final.name}.jar"/>
			
 
				+    </copy>
			
 
				+    
			
 
				+    <copy todir="${dist.dir}/bin">
			
 
				+      <fileset dir="bin"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/conf">
			
 
				+      <fileset dir="${conf.dir}" excludes="**/*.template"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/docs">
			
 
				+      <fileset dir="${build.docs}"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy file="ivy.xml" tofile="${dist.dir}/ivy.xml"/>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/ivy">
			
 
				+      <fileset dir="ivy"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy todir="${dist.dir}">
			
 
				+      <fileset dir=".">
			
 
				+        <include name="*.txt" />
			
 
				+      </fileset>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/src" includeEmptyDirs="true">
			
 
				+      <fileset dir="src" excludes="**/*.template **/docs/build/**/*"/>
			
 
				+    </copy>
			
 
				+  	
			
 
				+    <copy todir="${dist.dir}/" file="build.xml"/>
			
 
				+
			
 
				+    <chmod perm="ugo+x" type="file" parallel="false">
			
 
				+        <fileset dir="${dist.dir}/bin"/>
			
 
				+        <fileset dir="${dist.dir}/src/contrib/">
			
 
				+          <include name="*/bin/*" />
			
 
				+        </fileset>
			
 
				+        <fileset dir="${dist.dir}/src/contrib/ec2/bin/image"/>
			
 
				+    </chmod>
			
 
				+
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Make release tarball                                               -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="tar" depends="package" description="Make release tarball">
			
 
				+    <macro_tar param.destfile="${build.dir}/${final.name}.tar.gz">
			
 
				+      <param.listofitems>
			
 
				+        <tarfileset dir="${build.dir}" mode="664">
			
 
				+          <exclude name="${final.name}/bin/*" />
			
 
				+          <exclude name="${final.name}/contrib/*/bin/*" />
			
 
				+          <exclude name="${final.name}/src/contrib/ec2/bin/*" />
			
 
				+          <exclude name="${final.name}/src/contrib/ec2/bin/image/*" />
			
 
				+          <include name="${final.name}/**" />
			
 
				+        </tarfileset>
			
 
				+        <tarfileset dir="${build.dir}" mode="755">
			
 
				+          <include name="${final.name}/bin/*" />
			
 
				+          <include name="${final.name}/contrib/*/bin/*" />
			
 
				+          <include name="${final.name}/src/contrib/ec2/bin/*" />
			
 
				+          <include name="${final.name}/src/contrib/ec2/bin/image/*" />
			
 
				+        </tarfileset>
			
 
				+      </param.listofitems>
			
 
				+    </macro_tar>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="bin-package" depends="compile, jar, jar-test" 
			
 
				+		description="assembles artifacts for binary target">
			
 
				+    <mkdir dir="${dist.dir}"/>
			
 
				+    <mkdir dir="${dist.dir}/lib"/>
			
 
				+    <mkdir dir="${dist.dir}/contrib"/>
			
 
				+    <mkdir dir="${dist.dir}/bin"/>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
			
 
				+      <fileset dir="${common.ivy.lib.dir}"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false">
			
 
				+      <fileset dir="lib">
			
 
				+        <exclude name="**/native/**"/>
			
 
				+      </fileset>
			
 
				+    </copy>
			
 
				+
			
 
				+  	<exec dir="${dist.dir}" executable="sh" failonerror="true">
			
 
				+	  <env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
			
 
				+	  <env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
			
 
				+	  <env key="DIST_LIB_DIR" value="${dist.dir}/lib/native"/>
			
 
				+	  <arg line="${native.src.dir}/packageNativeHadoop.sh"/>
			
 
				+    </exec>
			
 
				+
			
 
				+    <subant target="package">
			
 
				+      <!--Pass down the version in case its needed again and the target
			
 
				+      distribution directory so contribs know where to install to.-->
			
 
				+      <property name="version" value="${version}"/>
			
 
				+      <property name="dist.dir" value="${dist.dir}"/>
			
 
				+      <fileset file="${contrib.dir}/build.xml"/>
			
 
				+    </subant>  	
			
 
				+
			
 
				+    <copy todir="${dist.dir}"> 
			
 
				+      <fileset file="${build.dir}/${final.name}.jar"/>
			
 
				+    </copy>
			
 
				+    
			
 
				+    <copy todir="${dist.dir}/bin">
			
 
				+      <fileset dir="bin"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/conf">
			
 
				+      <fileset dir="${conf.dir}" excludes="**/*.template"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy file="ivy.xml" tofile="${dist.dir}/ivy.xml"/>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/ivy">
			
 
				+      <fileset dir="ivy"/>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy todir="${dist.dir}">
			
 
				+      <fileset dir=".">
			
 
				+        <include name="*.txt" />
			
 
				+      </fileset>
			
 
				+    </copy>
			
 
				+
			
 
				+    <copy todir="${dist.dir}/" file="build.xml"/>
			
 
				+
			
 
				+    <chmod perm="ugo+x" type="file" parallel="false">
			
 
				+        <fileset dir="${dist.dir}/bin"/>
			
 
				+    </chmod>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="binary" depends="bin-package" description="Make tarball without source and documentation">
			
 
				+    <macro_tar param.destfile="${build.dir}/${final.name}-bin.tar.gz">
			
 
				+      <param.listofitems>
			
 
				+        <tarfileset dir="${build.dir}" mode="664">
			
 
				+          <exclude name="${final.name}/bin/*" />
			
 
				+          <exclude name="${final.name}/src/**" />
			
 
				+          <exclude name="${final.name}/docs/**" />
			
 
				+          <include name="${final.name}/**" />
			
 
				+        </tarfileset>
			
 
				+        <tarfileset dir="${build.dir}" mode="755">
			
 
				+          <include name="${final.name}/bin/*" />
			
 
				+        </tarfileset>
			
 
				+      </param.listofitems>
			
 
				+    </macro_tar>
			
 
				+  </target>
			
 
				+  
			
 
				+  <target name="ant-task-download" description="To download mvn-ant-task" unless="offline">
			
 
				+    <get src="${ant_task_repo_url}" dest="${ant_task.jar}" usetimestamp="true"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="mvn-taskdef" depends="ant-task-download">
			
 
				+     <path id="mvn-ant-task.classpath" path="${ant_task.jar}"/> 
			
 
				+     <typedef resource="org/apache/maven/artifact/ant/antlib.xml" 
			
 
				+         uri="urn:maven-artifact-ant"
			
 
				+         classpathref="mvn-ant-task.classpath"/>
			
 
				+  </target>   
			
 
				+
			
 
				+  <target name="mvn-install" depends="mvn-taskdef,jar,jar-test,set-version"
			
 
				+    description="Install hadoop common and test jars to local fs m2 repo">
			
 
				+     <artifact:pom file="${hadoop-common.pom}" id="hadoop.core"/>
			
 
				+     <artifact:pom file="${hadoop-common-test.pom}" id="hadoop.core.test"/>
			
 
				+     <artifact:install file="${hadoop-common.jar}">
			
 
				+        <pom refid="hadoop.core"/>
			
 
				+	<attach file="${hadoop-common-sources.jar}" classifier="sources" />
			
 
				+     </artifact:install>
			
 
				+     <artifact:install file="${hadoop-common-test.jar}">
			
 
				+        <pom refid="hadoop.core.test"/>
			
 
				+	<attach file="${hadoop-common-test-sources.jar}" classifier="sources" />
			
 
				+     </artifact:install>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="mvn-si-install" depends="mvn-install,-mvn-system-install"
			
 
				+     description="Install system integration test jars as well"/>
			
 
				+
			
 
				+  <target name="mvn-deploy" depends="mvn-taskdef, jar, jar-test,
			
 
				+     jar-system, set-version, signanddeploy, simpledeploy"
			
 
				+     description="To deploy hadoop common and test jar's to apache
			
 
				+     snapshot's repository"/>
			
 
				+
			
 
				+  <target name="signanddeploy" if="staging" depends="sign">
			
 
				+     <artifact:pom file="${hadoop-common.pom}" id="hadoop.core"/>
			
 
				+     <artifact:pom file="${hadoop-common-test.pom}" id="hadoop.core.test"/>
			
 
				+     <artifact:pom file="${hadoop-common-instrumented.pom}" 
			
 
				+       id="hadoop.core.${herriot.suffix}"/>
			
 
				+     <artifact:install-provider artifactId="wagon-http"
			
 
				+     version="${wagon-http.version}"/>
			
 
				+
			
 
				+     <artifact:deploy file="${hadoop-common.jar}">
			
 
				+       <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/>
			
 
				+       <pom refid="hadoop.core"/>
			
 
				+       <attach file="${hadoop-common.jar}.asc" type="jar.asc"/>
			
 
				+       <attach file="${hadoop-common.pom}.asc" type="pom.asc"/>
			
 
				+       <attach file="${hadoop-common-sources.jar}.asc" type="jar.asc"
			
 
				+         classifier="sources"/>
			
 
				+       <attach file="${hadoop-common-sources.jar}" classifier="sources"/>
			
 
				+     </artifact:deploy>
			
 
				+
			
 
				+     <artifact:deploy file="${hadoop-common-test.jar}">
			
 
				+       <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/>
			
 
				+       <pom refid="hadoop.core.test"/>
			
 
				+       <attach file="${hadoop-common-test.jar}.asc" type="jar.asc"/>
			
 
				+       <attach file="${hadoop-common-test.pom}.asc" type="pom.asc"/>
			
 
				+       <attach file="${hadoop-common-test-sources.jar}.asc" type="jar.asc"
			
 
				+         classifier="sources"/>
			
 
				+       <attach file="${hadoop-common-test-sources.jar}" classifier="sources"/>
			
 
				+     </artifact:deploy>
			
 
				+
			
 
				+     <artifact:deploy file="${hadoop-common-instrumented.jar}">
			
 
				+       <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/>
			
 
				+       <pom refid="hadoop.core.${herriot.suffix}"/>
			
 
				+       <attach file="${hadoop-common-instrumented.jar}.asc" type="jar.asc"/>
			
 
				+       <attach file="${hadoop-common-instrumented.pom}.asc" type="pom.asc"/>
			
 
				+       <attach file="${hadoop-common-instrumented-sources.jar}.asc" 
			
 
				+         type="jar.asc" classifier="sources"/>
			
 
				+       <attach file="${hadoop-common-instrumented-sources.jar}"
			
 
				+         classifier="sources"/>
			
 
				+     </artifact:deploy>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="sign" depends="clean-sign" if="staging">
			
 
				+    <input message="password:>" addproperty="gpg.passphrase">
			
 
				+     <handler classname="org.apache.tools.ant.input.SecureInputHandler" />
			
 
				+    </input>
			
 
				+    <macrodef name="sign-artifact" description="Signs the artifact">
			
 
				+      <attribute name="input.file"/>
			
 
				+      <attribute name="output.file" default="@{input.file}.asc"/>
			
 
				+      <attribute name="gpg.passphrase"/>
			
 
				+      <sequential>
			
 
				+        <echo>Signing @{input.file} Sig File: @{output.file}</echo>
			
 
				+        <exec executable="gpg" >
			
 
				+          <arg value="--armor"/>
			
 
				+          <arg value="--output"/>
			
 
				+          <arg value="@{output.file}"/>
			
 
				+          <arg value="--passphrase"/>
			
 
				+          <arg value="@{gpg.passphrase}"/>
			
 
				+          <arg value="--detach-sig"/>
			
 
				+          <arg value="@{input.file}"/>
			
 
				+        </exec>
			
 
				+      </sequential>
			
 
				+    </macrodef>
			
 
				+    <sign-artifact input.file="${hadoop-common.jar}" 
			
 
				+     output.file="${hadoop-common.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-common-test.jar}" 
			
 
				+     output.file="${hadoop-common-test.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-common-sources.jar}" 
			
 
				+     output.file="${hadoop-common-sources.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-common-test-sources.jar}" 
			
 
				+     output.file="${hadoop-common-test-sources.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-common.pom}" 
			
 
				+     output.file="${hadoop-common.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-common-test.pom}" 
			
 
				+     output.file="${hadoop-common-test.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-common-instrumented.jar}" 
			
 
				+     output.file="${hadoop-common-instrumented.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-common-instrumented.pom}" 
			
 
				+     output.file="${hadoop-common-instrumented.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-common-instrumented-sources.jar}" 
			
 
				+     output.file="${hadoop-common-instrumented-sources.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="simpledeploy" unless="staging">
			
 
				+     <artifact:pom file="${hadoop-common.pom}" id="hadoop.core"/>
			
 
				+     <artifact:pom file="${hadoop-common-test.pom}" id="hadoop.test"/>
			
 
				+     <artifact:pom file="${hadoop-common-instrumented.pom}" 
			
 
				+       id="hadoop.core.${herriot.suffix}"/>
			
 
				+
			
 
				+     <artifact:install-provider artifactId="wagon-http" version="${wagon-http.version}"/>
			
 
				+     <artifact:deploy file="${hadoop-common.jar}">
			
 
				+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
			
 
				+         <pom refid="hadoop.core"/>
			
 
				+	 <attach file="${hadoop-common-sources.jar}" classifier="sources" />
			
 
				+     </artifact:deploy>
			
 
				+
			
 
				+     <artifact:deploy file="${hadoop-common-test.jar}">
			
 
				+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
			
 
				+         <pom refid="hadoop.core.test"/>
			
 
				+	 <attach file="${hadoop-common-test-sources.jar}" classifier="sources" />
			
 
				+     </artifact:deploy> 
			
 
				+
			
 
				+     <artifact:deploy file="${hadoop-common-instrumented.jar}">
			
 
				+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
			
 
				+         <pom refid="hadoop.core.${herriot.suffix}"/>
			
 
				+         <attach file="${hadoop-common-instrumented-sources.jar}" classifier="sources" />
			
 
				+     </artifact:deploy>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="set-version">
			
 
				+    <delete file="${basedir}/ivy/hadoop-common.xml"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-common-test.xml"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-common-${herriot.suffix}.xml"/>
			
 
				+    <copy file="${basedir}/ivy/hadoop-common-template.xml" tofile="${basedir}/ivy/hadoop-common.xml"/>
			
 
				+    <copy file="${basedir}/ivy/hadoop-common-test-template.xml" tofile="${basedir}/ivy/hadoop-common-test.xml"/>
			
 
				+    <copy file="${basedir}/ivy/hadoop-common-${herriot.suffix}-template.xml"
			
 
				+      tofile="${basedir}/ivy/hadoop-common-${herriot.suffix}.xml"/>
			
 
				+    <replaceregexp byline="true">
			
 
				+      <regexp pattern="@version"/>
			
 
				+      <substitution expression="${version}"/>
			
 
				+      <fileset dir="${basedir}/ivy">
			
 
				+        <include name="hadoop-common.xml"/>
			
 
				+        <include name="hadoop-common-test.xml"/>
			
 
				+        <include name="hadoop-common-${herriot.suffix}.xml"/>
			
 
				+      </fileset>
			
 
				+    </replaceregexp>
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Perform audit activities for the release                           -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="rats-taskdef" depends="ivy-retrieve-releaseaudit">
			
 
				+     <typedef format="xml" resource="org/apache/rat/anttasks/antlib.xml" uri="antlib:org.apache.rat.anttasks"
			
 
				+      classpathref="releaseaudit-classpath"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="releaseaudit" depends="package, rats-taskdef" description="Release Audit activities">
			
 
				+   <rat:report xmlns:rat="antlib:org.apache.rat.anttasks">
			
 
				+      <fileset dir="${dist.dir}">
			
 
				+        <exclude name="**/CHANGES.txt"/>
			
 
				+        <exclude name="**/conf/*"/>
			
 
				+        <exclude name="**/docs/"/>
			
 
				+        <exclude name="lib/jdiff/"/>
			
 
				+        <exclude name="**/native/*"/>
			
 
				+        <exclude name="**/native/config/*"/>
			
 
				+        <exclude name="**/VERSION"/>
			
 
				+        <exclude name="**/*.json"/>
			
 
				+        <exclude name="**/hod/*.txt"/>
			
 
				+      </fileset>
			
 
				+    </rat:report>
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Clean.  Delete the build files, and their directories              -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="clean" depends="clean-contrib, clean-sign, clean-fi" description="Clean.  Delete the build files, and their directories">
			
 
				+    <delete dir="${build.dir}"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-common.xml"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-common-pom.xml"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-common-test.xml"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-common-test-pom.xml"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-common-${herriot.suffix}.xml"/>
			
 
				+    <delete dir="${docs.src}/build"/>
			
 
				+    <delete dir="${src.docs.cn}/build"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="clean-sign" description="Clean.  Delete .asc files">
			
 
				+    <delete>
			
 
				+      <fileset dir="." includes="**/**/*.asc"/>
			
 
				+    </delete>
			
 
				+  </target>  
			
 
				+
			
 
				+  <target name="veryclean" depends="clean" description="Delete mvn ant task jar and ivy ant taks jar">
			
 
				+    <delete>
			
 
				+      <fileset dir="${ivy.dir}" includes="*.jar"/>
			
 
				+    </delete>
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Clean contrib target. For now, must be called explicitly           -->
			
 
				+  <!-- Using subant instead of ant as a workaround for 30569              -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="clean-contrib">
			
 
				+     <subant target="clean">        
			
 
				+        <fileset file="src/contrib/build.xml"/>
			
 
				+     </subant>  	
			
 
				+  </target>
			
 
				+	
			
 
				+ <target name="clover" depends="clover.setup, clover.info" description="Instrument the Unit tests using Clover. 
			
 
				+     To use, specify -Dclover.home=&lt;base of clover installation&gt; -Drun.clover=true on the command line."/>
			
 
				+
			
 
				+<target name="clover.setup" if="clover.enabled">
			
 
				+   <taskdef resource="cloverlib.xml" classpath="${clover.jar}"/>
			
 
				+   <mkdir dir="${clover.db.dir}"/>
			
 
				+   <clover-setup initString="${clover.db.dir}/hadoop_coverage.db">
			
 
				+     <fileset dir="${src.dir}" includes="java/**/*"/>
			
 
				+     <testsources dir="${test.src.dir}"/>
			
 
				+   </clover-setup>
			
 
				+</target>
			
 
				+
			
 
				+<target name="clover.info" unless="clover.present">
			
 
				+  <echo>
			
 
				+     Clover not found. Code coverage reports disabled.
			
 
				+  </echo>
			
 
				+</target>
			
 
				+
			
 
				+<target name="clover.check">
			
 
				+  <fail unless="clover.present">
			
 
				+  ##################################################################
			
 
				+   Clover not found.
			
 
				+   Please specify -Dclover.home=&lt;base of clover installation&gt;
			
 
				+   on the command line.
			
 
				+  ##################################################################
			
 
				+  </fail>
			
 
				+</target>
			
 
				+
			
 
				+<target name="generate-clover-reports" depends="clover.check, clover">
			
 
				+  <mkdir dir="${clover.report.dir}"/>
			
 
				+  <clover-report>
			
 
				+     <current outfile="${clover.report.dir}" title="${final.name}">
			
 
				+     <format type="html"/>
			
 
				+     </current>
			
 
				+  </clover-report>
			
 
				+  <clover-report>
			
 
				+     <current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
			
 
				+     <format type="xml"/>
			
 
				+     </current>
			
 
				+  </clover-report>
			
 
				+</target>
			
 
				+
			
 
				+<target name="findbugs.check" depends="check-for-findbugs" unless="findbugs.present">
			
 
				+  <fail message="'findbugs.home' is not defined. Please pass -Dfindbugs.home=&lt;base of Findbugs installation&gt; to Ant on the command-line." />
			
 
				+</target>
			
 
				+
			
 
				+<target name="patch.check" unless="patch.file">
			
 
				+  <fail message="'patch.file' is not defined. Please pass -Dpatch.file=&lt;location of patch file&gt; to Ant on the command-line." />
			
 
				+</target>
			
 
				+
			
 
				+<target name="test-patch" depends="patch.check,findbugs.check,forrest.check">
			
 
				+  <exec executable="bash" failonerror="true">
			
 
				+    <arg value="${basedir}/src/test/bin/test-patch.sh"/>
			
 
				+    <arg value="DEVELOPER"/>
			
 
				+    <arg value="${patch.file}"/>
			
 
				+    <arg value="${scratch.dir}"/>
			
 
				+    <arg value="${svn.cmd}"/>
			
 
				+    <arg value="${grep.cmd}"/>
			
 
				+    <arg value="${patch.cmd}"/>
			
 
				+    <arg value="${findbugs.home}"/>
			
 
				+    <arg value="${forrest.home}"/>
			
 
				+    <arg value="${basedir}"/>
			
 
				+    <arg value="${java5.home}"/>
			
 
				+  </exec>
			
 
				+</target>
			
 
				+
			
 
				+<target name="hudson-test-patch" depends="findbugs.check,forrest.check">
			
 
				+  <exec executable="bash" failonerror="true">
			
 
				+    <arg value="${basedir}/src/test/bin/test-patch.sh"/>
			
 
				+    <arg value="HUDSON"/>
			
 
				+    <arg value="${scratch.dir}"/>
			
 
				+    <arg value="${support.dir}"/>
			
 
				+    <arg value="${ps.cmd}"/>
			
 
				+    <arg value="${wget.cmd}"/>
			
 
				+    <arg value="${jiracli.cmd}"/>
			
 
				+    <arg value="${svn.cmd}"/>
			
 
				+    <arg value="${grep.cmd}"/>
			
 
				+    <arg value="${patch.cmd}"/>
			
 
				+    <arg value="${findbugs.home}"/>
			
 
				+    <arg value="${forrest.home}"/>
			
 
				+    <arg value="${eclipse.home}"/>
			
 
				+    <arg value="${python.home}"/>
			
 
				+    <arg value="${basedir}"/>
			
 
				+    <arg value="${jira.passwd}"/>
			
 
				+    <arg value="${java5.home}"/>
			
 
				+    <arg value="${curl.cmd}"/>
			
 
				+    <arg value="${defect}"/>
			
 
				+  </exec>
			
 
				+</target>
			
 
				+	
			
 
				+  <condition property="ant-eclipse.jar.exists">
			
 
				+    <available file="${build.dir}/lib/ant-eclipse-1.0-jvm1.2.jar"/>
			
 
				+  </condition>
			
 
				+
			
 
				+  <target name="ant-eclipse-download" unless="ant-eclipse.jar.exists"
			
 
				+          description="Downloads the ant-eclipse binary.">
			
 
				+    <get src="http://downloads.sourceforge.net/project/ant-eclipse/ant-eclipse/1.0/ant-eclipse-1.0.bin.tar.bz2"
			
 
				+         dest="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" usetimestamp="false" />
			
 
				+
			
 
				+    <untar src="${build.dir}/ant-eclipse-1.0.bin.tar.bz2"
			
 
				+           dest="${build.dir}" compression="bzip2">
			
 
				+      <patternset>
			
 
				+        <include name="lib/ant-eclipse-1.0-jvm1.2.jar"/>
			
 
				+      </patternset>
			
 
				+    </untar>
			
 
				+    <delete file="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" />
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="eclipse" 
			
 
				+          depends="init,ant-eclipse-download,ivy-retrieve-common,ivy-retrieve-test,compile-core-test"
			
 
				+          description="Create eclipse project files">
			
 
				+	     <pathconvert property="eclipse.project">
			
 
				+	       <path path="${basedir}"/>
			
 
				+	       <regexpmapper from="^.*/([^/]+)$$" to="\1" handledirsep="yes"/>
			
 
				+	     </pathconvert>
			
 
				+    <taskdef name="eclipse"
			
 
				+             classname="prantl.ant.eclipse.EclipseTask"
			
 
				+             classpath="${build.dir}/lib/ant-eclipse-1.0-jvm1.2.jar" />
			
 
				+    <eclipse updatealways="true">
			
 
				+      <project name="${eclipse.project}" />
			
 
				+      <classpath>
			
 
				+        <source path="${java.src.dir}"
			
 
				+                output="${build.dir.eclipse-main-classes}" />
			
 
				+        <source path="${test.src.dir}/core"
			
 
				+                output="${build.dir.eclipse-test-classes}" />
			
 
				+        <source path="${test.src.dir}/aop"
			
 
				+                output="${build.dir.eclipse-test-classes}" />
			
 
				+        <source path="${test.generated.dir}"
			
 
				+                output="${build.dir.eclipse-test-generated-classes}" />
			
 
				+        <output path="${build.dir.eclipse-main-classes}" />
			
 
				+        <library pathref="ivy-common.classpath" exported="true" />
			
 
				+        <library pathref="ivy-test.classpath" exported="false" />
			
 
				+        <variable path="ANT_HOME/lib/ant.jar" exported="false" />
			
 
				+        <library path="${conf.dir}" exported="false" />
			
 
				+      </classpath>
			
 
				+    </eclipse>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-init-dirs">
			
 
				+    <mkdir dir="${build.ivy.dir}" />
			
 
				+    <mkdir dir="${build.ivy.lib.dir}" />
			
 
				+    <mkdir dir="${build.ivy.report.dir}" />
			
 
				+    <mkdir dir="${build.ivy.maven.dir}" />
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-probe-antlib" >
			
 
				+    <condition property="ivy.found">
			
 
				+      <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
			
 
				+    </condition>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-download" description="To download ivy" unless="offline">
			
 
				+    <get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <!--
			
 
				+  To avoid Ivy leaking things across big projects, always load Ivy in the same classloader.
			
 
				+  Also note how we skip loading Ivy if it is already there, just to make sure all is well.
			
 
				+  -->
			
 
				+  <target name="ivy-init-antlib" depends="ivy-download,ivy-init-dirs,ivy-probe-antlib" unless="ivy.found">
			
 
				+    <typedef uri="antlib:org.apache.ivy.ant" onerror="fail"
			
 
				+      loaderRef="ivyLoader">
			
 
				+      <classpath>
			
 
				+        <pathelement location="${ivy.jar}"/>
			
 
				+      </classpath>
			
 
				+    </typedef>
			
 
				+    <fail >
			
 
				+      <condition >
			
 
				+        <not>
			
 
				+          <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
			
 
				+        </not>
			
 
				+      </condition>
			
 
				+      You need Apache Ivy 2.0 or later from http://ant.apache.org/
			
 
				+      It could not be loaded from ${ivy_repo_url}
			
 
				+    </fail>
			
 
				+  </target>
			
 
				+
			
 
				+  <property name="ivyresolvelog" value="download-only"/>
			
 
				+  <property name="ivyretrievelog" value="quiet"/>
			
 
				+
			
 
				+  <target name="ivy-init" depends="ivy-init-antlib" >
			
 
				+
			
 
				+    <!--Configure Ivy by reading in the settings file
			
 
				+        If anyone has already read in a settings file into this settings ID, it gets priority
			
 
				+    -->
			
 
				+    <ivy:configure settingsid="${ant.project.name}.ivy.settings" file="${ivysettings.xml}" override='false'
			
 
				+      realm="Sonatype Nexus Repository Manager"/>
			
 
				+
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-resolve" depends="ivy-init">
			
 
				+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings"
			
 
				+    	log="${ivyresolvelog}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-resolve-javadoc" depends="ivy-init">
			
 
				+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="javadoc"
			
 
				+    	log="${ivyresolvelog}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-resolve-releaseaudit" depends="ivy-init">
			
 
				+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="releaseaudit"
			
 
				+  		log="${ivyresolvelog}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-resolve-test" depends="ivy-init">
			
 
				+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="test"
			
 
				+    	log="${ivyresolvelog}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-resolve-common" depends="ivy-init">
			
 
				+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="common"
			
 
				+    	log="${ivyresolvelog}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-resolve-jdiff" depends="ivy-init">
			
 
				+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="jdiff"
			
 
				+    	log="${ivyresolvelog}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-resolve-checkstyle" depends="ivy-init">
			
 
				+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="checkstyle"
			
 
				+  		log="${ivyresolvelog}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-retrieve" depends="ivy-resolve"
			
 
				+    description="Retrieve Ivy-managed artifacts">
			
 
				+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
			
 
				+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
			
 
				+    		log="${ivyretrievelog}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-retrieve-checkstyle" depends="ivy-resolve-checkstyle"
			
 
				+    description="Retrieve Ivy-managed artifacts for the checkstyle configurations">
			
 
				+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
			
 
				+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
			
 
				+  			log="${ivyretrievelog}"/>
			
 
				+    <ivy:cachepath pathid="checkstyle-classpath" conf="checkstyle"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-retrieve-jdiff" depends="ivy-resolve-jdiff"
			
 
				+    description="Retrieve Ivy-managed artifacts for the jdiff configurations">
			
 
				+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
			
 
				+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
			
 
				+  			log="${ivyretrievelog}"/>
			
 
				+    <ivy:cachepath pathid="jdiff-classpath" conf="jdiff"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-retrieve-javadoc" depends="ivy-resolve-javadoc"
			
 
				+    description="Retrieve Ivy-managed artifacts for the javadoc configurations">
			
 
				+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
			
 
				+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
			
 
				+  			log="${ivyretrievelog}"/>
			
 
				+    <ivy:cachepath pathid="javadoc-classpath" conf="javadoc"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-retrieve-test" depends="ivy-resolve-test"
			
 
				+    description="Retrieve Ivy-managed artifacts for the test configurations">
			
 
				+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
			
 
				+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
			
 
				+    		log="${ivyretrievelog}"/>
			
 
				+    <ivy:cachepath pathid="ivy-test.classpath" conf="test"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-retrieve-common" depends="ivy-resolve-common"
			
 
				+    description="Retrieve Ivy-managed artifacts for the compile configurations">
			
 
				+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
			
 
				+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
			
 
				+    		log="${ivyretrievelog}"/>
			
 
				+    <ivy:cachepath pathid="ivy-common.classpath" conf="common"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-retrieve-releaseaudit" depends="ivy-resolve-releaseaudit"
			
 
				+    description="Retrieve Ivy-managed artifacts for the compile configurations">
			
 
				+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
			
 
				+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
			
 
				+    		log="${ivyretrievelog}"/>
			
 
				+    <ivy:cachepath pathid="releaseaudit-classpath" conf="releaseaudit"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-report" depends="ivy-resolve-releaseaudit"
			
 
				+    description="Generate">
			
 
				+    <ivy:report todir="${build.ivy.report.dir}" settingsRef="${ant.project.name}.ivy.settings"/>
			
 
				+    <echo>
			
 
				+      Reports generated:${build.ivy.report.dir}
			
 
				+    </echo>
			
 
				+  </target>
			
 
				+
			
 
				+</project>
			
--- a/common/conf/configuration.xsl
+++ b/common/conf/configuration.xsl
@@ -0,0 +1,24 @@
 
				+<?xml version="1.0"?>
			
 
				+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
			
 
				+<xsl:output method="html"/>
			
 
				+<xsl:template match="configuration">
			
 
				+<html>
			
 
				+<body>
			
 
				+<table border="1">
			
 
				+<tr>
			
 
				+ <td>name</td>
			
 
				+ <td>value</td>
			
 
				+ <td>description</td>
			
 
				+</tr>
			
 
				+<xsl:for-each select="property">
			
 
				+<tr>
			
 
				+  <td><a name="{name}"><xsl:value-of select="name"/></a></td>
			
 
				+  <td><xsl:value-of select="value"/></td>
			
 
				+  <td><xsl:value-of select="description"/></td>
			
 
				+</tr>
			
 
				+</xsl:for-each>
			
 
				+</table>
			
 
				+</body>
			
 
				+</html>
			
 
				+</xsl:template>
			
 
				+</xsl:stylesheet>
			
--- a/common/conf/core-site.xml.template
+++ b/common/conf/core-site.xml.template
@@ -0,0 +1,8 @@
 
				+<?xml version="1.0"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+
			
 
				+<!-- Put site-specific property overrides in this file. -->
			
 
				+
			
 
				+<configuration>
			
 
				+
			
 
				+</configuration>
			
--- a/common/conf/hadoop-env.sh.template
+++ b/common/conf/hadoop-env.sh.template
@@ -0,0 +1,54 @@
 
				+# Set Hadoop-specific environment variables here.
			
 
				+
			
 
				+# The only required environment variable is JAVA_HOME.  All others are
			
 
				+# optional.  When running a distributed configuration it is best to
			
 
				+# set JAVA_HOME in this file, so that it is correctly defined on
			
 
				+# remote nodes.
			
 
				+
			
 
				+# The java implementation to use.  Required.
			
 
				+# export JAVA_HOME=/usr/lib/j2sdk1.6-sun
			
 
				+
			
 
				+# Extra Java CLASSPATH elements.  Optional.
			
 
				+# export HADOOP_CLASSPATH="<extra_entries>:$HADOOP_CLASSPATH"
			
 
				+
			
 
				+# The maximum amount of heap to use, in MB. Default is 1000.
			
 
				+# export HADOOP_HEAPSIZE=2000
			
 
				+
			
 
				+# Extra Java runtime options.  Empty by default.
			
 
				+# if [ "$HADOOP_OPTS" == "" ]; then export HADOOP_OPTS=-server; else HADOOP_OPTS+=" -server"; fi
			
 
				+
			
 
				+# Command specific options appended to HADOOP_OPTS when specified
			
 
				+export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
			
 
				+export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
			
 
				+export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
			
 
				+export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
			
 
				+export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
			
 
				+# export HADOOP_TASKTRACKER_OPTS=
			
 
				+# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
			
 
				+# export HADOOP_CLIENT_OPTS
			
 
				+
			
 
				+# Extra ssh options.  Empty by default.
			
 
				+# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
			
 
				+
			
 
				+# Where log files are stored.  $HADOOP_HOME/logs by default.
			
 
				+# export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
			
 
				+
			
 
				+# File naming remote slave hosts.  $HADOOP_HOME/conf/slaves by default.
			
 
				+# export HADOOP_SLAVES=${HADOOP_HOME}/conf/slaves
			
 
				+
			
 
				+# host:path where hadoop code should be rsync'd from.  Unset by default.
			
 
				+# export HADOOP_MASTER=master:/home/$USER/src/hadoop
			
 
				+
			
 
				+# Seconds to sleep between slave commands.  Unset by default.  This
			
 
				+# can be useful in large clusters, where, e.g., slave rsyncs can
			
 
				+# otherwise arrive faster than the master can service them.
			
 
				+# export HADOOP_SLAVE_SLEEP=0.1
			
 
				+
			
 
				+# The directory where pid files are stored. /tmp by default.
			
 
				+# export HADOOP_PID_DIR=/var/hadoop/pids
			
 
				+
			
 
				+# A string representing this instance of hadoop. $USER by default.
			
 
				+# export HADOOP_IDENT_STRING=$USER
			
 
				+
			
 
				+# The scheduling priority for daemon processes.  See 'man nice'.
			
 
				+# export HADOOP_NICENESS=10
			
--- a/common/conf/hadoop-metrics.properties
+++ b/common/conf/hadoop-metrics.properties
@@ -0,0 +1,72 @@
 
				+# Configuration of the "dfs" context for null
			
 
				+dfs.class=org.apache.hadoop.metrics.spi.NullContext
			
 
				+
			
 
				+# Configuration of the "dfs" context for file
			
 
				+#dfs.class=org.apache.hadoop.metrics.file.FileContext
			
 
				+#dfs.period=10
			
 
				+#dfs.fileName=/tmp/dfsmetrics.log
			
 
				+
			
 
				+# Configuration of the "dfs" context for ganglia
			
 
				+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
			
 
				+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
			
 
				+# dfs.period=10
			
 
				+# dfs.servers=localhost:8649
			
 
				+
			
 
				+
			
 
				+# Configuration of the "mapred" context for null
			
 
				+mapred.class=org.apache.hadoop.metrics.spi.NullContext
			
 
				+
			
 
				+# Configuration of the "mapred" context for file
			
 
				+#mapred.class=org.apache.hadoop.metrics.file.FileContext
			
 
				+#mapred.period=10
			
 
				+#mapred.fileName=/tmp/mrmetrics.log
			
 
				+
			
 
				+# Configuration of the "mapred" context for ganglia
			
 
				+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
			
 
				+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
			
 
				+# mapred.period=10
			
 
				+# mapred.servers=localhost:8649
			
 
				+
			
 
				+
			
 
				+# Configuration of the "jvm" context for null
			
 
				+#jvm.class=org.apache.hadoop.metrics.spi.NullContext
			
 
				+
			
 
				+# Configuration of the "jvm" context for file
			
 
				+#jvm.class=org.apache.hadoop.metrics.file.FileContext
			
 
				+#jvm.period=10
			
 
				+#jvm.fileName=/tmp/jvmmetrics.log
			
 
				+
			
 
				+# Configuration of the "jvm" context for ganglia
			
 
				+# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				+# jvm.period=10
			
 
				+# jvm.servers=localhost:8649
			
 
				+
			
 
				+# Configuration of the "rpc" context for null
			
 
				+rpc.class=org.apache.hadoop.metrics.spi.NullContext
			
 
				+
			
 
				+# Configuration of the "rpc" context for file
			
 
				+#rpc.class=org.apache.hadoop.metrics.file.FileContext
			
 
				+#rpc.period=10
			
 
				+#rpc.fileName=/tmp/rpcmetrics.log
			
 
				+
			
 
				+# Configuration of the "rpc" context for ganglia
			
 
				+# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				+# rpc.period=10
			
 
				+# rpc.servers=localhost:8649
			
 
				+
			
 
				+
			
 
				+# Configuration of the "ugi" context for null
			
 
				+ugi.class=org.apache.hadoop.metrics.spi.NullContext
			
 
				+
			
 
				+# Configuration of the "ugi" context for file
			
 
				+#ugi.class=org.apache.hadoop.metrics.file.FileContext
			
 
				+#ugi.period=10
			
 
				+#ugi.fileName=/tmp/ugimetrics.log
			
 
				+
			
 
				+# Configuration of the "ugi" context for ganglia
			
 
				+# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				+# ugi.period=10
			
 
				+# ugi.servers=localhost:8649
			
 
				+
			
--- a/common/conf/hadoop-metrics2.properties.example
+++ b/common/conf/hadoop-metrics2.properties.example
@@ -0,0 +1,16 @@
 
				+# syntax: [prefix].[source|sink].[instance].[options]
			
 
				+# See javadoc of package-info.java for org.apache.hadoop.metrics2 for details
			
 
				+
			
 
				+*.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink
			
 
				+
			
 
				+#namenode.sink.file.filename=namenode-metrics.out
			
 
				+
			
 
				+#datanode.sink.file.filename=datanode-metrics.out
			
 
				+
			
 
				+#jobtracker.sink.file.filename=jobtracker-metrics.out
			
 
				+
			
 
				+#tasktracker.sink.file.filename=tasktracker-metrics.out
			
 
				+
			
 
				+#maptask.sink.file.filename=maptask-metrics.out
			
 
				+
			
 
				+#reducetask.sink.file.filename=reducetask-metrics.out
			
--- a/common/conf/hadoop-policy.xml.template
+++ b/common/conf/hadoop-policy.xml.template
@@ -0,0 +1,106 @@
 
				+<?xml version="1.0"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+
			
 
				+<!-- Put site-specific property overrides in this file. -->
			
 
				+
			
 
				+<configuration>
			
 
				+  <property>
			
 
				+    <name>security.client.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for ClientProtocol, which is used by user code 
			
 
				+    via the DistributedFileSystem. 
			
 
				+    The ACL is a comma-separated list of user and group names. The user and 
			
 
				+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
			
 
				+    A special value of "*" means all users are allowed.</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>security.client.datanode.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol 
			
 
				+    for block recovery.
			
 
				+    The ACL is a comma-separated list of user and group names. The user and 
			
 
				+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
			
 
				+    A special value of "*" means all users are allowed.</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>security.datanode.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for DatanodeProtocol, which is used by datanodes to 
			
 
				+    communicate with the namenode.
			
 
				+    The ACL is a comma-separated list of user and group names. The user and 
			
 
				+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
			
 
				+    A special value of "*" means all users are allowed.</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>security.inter.datanode.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for InterDatanodeProtocol, the inter-datanode protocol
			
 
				+    for updating generation timestamp.
			
 
				+    The ACL is a comma-separated list of user and group names. The user and 
			
 
				+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
			
 
				+    A special value of "*" means all users are allowed.</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>security.namenode.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for NamenodeProtocol, the protocol used by the secondary
			
 
				+    namenode to communicate with the namenode.
			
 
				+    The ACL is a comma-separated list of user and group names. The user and 
			
 
				+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
			
 
				+    A special value of "*" means all users are allowed.</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>security.inter.tracker.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for InterTrackerProtocol, used by the tasktrackers to 
			
 
				+    communicate with the jobtracker.
			
 
				+    The ACL is a comma-separated list of user and group names. The user and 
			
 
				+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
			
 
				+    A special value of "*" means all users are allowed.</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>security.job.submission.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for JobSubmissionProtocol, used by job clients to 
			
 
				+    communciate with the jobtracker for job submission, querying job status etc.
			
 
				+    The ACL is a comma-separated list of user and group names. The user and 
			
 
				+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
			
 
				+    A special value of "*" means all users are allowed.</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>security.task.umbilical.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for TaskUmbilicalProtocol, used by the map and reduce 
			
 
				+    tasks to communicate with the parent tasktracker. 
			
 
				+    The ACL is a comma-separated list of user and group names. The user and 
			
 
				+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
			
 
				+    A special value of "*" means all users are allowed.</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>security.refresh.policy.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for RefreshAuthorizationPolicyProtocol, used by the 
			
 
				+    dfsadmin and mradmin commands to refresh the security policy in-effect. 
			
 
				+    The ACL is a comma-separated list of user and group names. The user and 
			
 
				+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
			
 
				+    A special value of "*" means all users are allowed.</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>security.admin.operations.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for AdminOperationsProtocol, used by the mradmins commands
			
 
				+    to refresh queues and nodes at JobTracker. The ACL is a comma-separated list of 
			
 
				+    user and group names. The user and group list is separated by a blank. 
			
 
				+    For e.g. "alice,bob users,wheel". A special value of "*" means all users are 
			
 
				+    allowed.</description>
			
 
				+  </property>
			
 
				+</configuration>
			
--- a/common/conf/log4j.properties
+++ b/common/conf/log4j.properties
@@ -0,0 +1,149 @@
 
				+# Define some default values that can be overridden by system properties
			
 
				+hadoop.root.logger=INFO,console
			
 
				+hadoop.log.dir=.
			
 
				+hadoop.log.file=hadoop.log
			
 
				+
			
 
				+#
			
 
				+# Job Summary Appender 
			
 
				+#
			
 
				+# Use following logger to send summary to separate file defined by 
			
 
				+# hadoop.mapreduce.jobsummary.log.file rolled daily:
			
 
				+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
			
 
				+# 
			
 
				+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
			
 
				+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
			
 
				+
			
 
				+# Define the root logger to the system property "hadoop.root.logger".
			
 
				+log4j.rootLogger=${hadoop.root.logger}, EventCounter
			
 
				+
			
 
				+# Logging Threshold
			
 
				+log4j.threshold=ALL
			
 
				+
			
 
				+#
			
 
				+# Daily Rolling File Appender
			
 
				+#
			
 
				+
			
 
				+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
			
 
				+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
			
 
				+
			
 
				+# Rollver at midnight
			
 
				+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
			
 
				+
			
 
				+# 30-day backup
			
 
				+#log4j.appender.DRFA.MaxBackupIndex=30
			
 
				+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
			
 
				+
			
 
				+# Pattern format: Date LogLevel LoggerName LogMessage
			
 
				+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
			
 
				+# Debugging Pattern format
			
 
				+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
			
 
				+
			
 
				+
			
 
				+#
			
 
				+# console
			
 
				+# Add "console" to rootlogger above if you want to use this 
			
 
				+#
			
 
				+
			
 
				+log4j.appender.console=org.apache.log4j.ConsoleAppender
			
 
				+log4j.appender.console.target=System.err
			
 
				+log4j.appender.console.layout=org.apache.log4j.PatternLayout
			
 
				+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
			
 
				+
			
 
				+#
			
 
				+# TaskLog Appender
			
 
				+#
			
 
				+
			
 
				+#Default values
			
 
				+hadoop.tasklog.taskid=null
			
 
				+hadoop.tasklog.iscleanup=false
			
 
				+hadoop.tasklog.noKeepSplits=4
			
 
				+hadoop.tasklog.totalLogFileSize=100
			
 
				+hadoop.tasklog.purgeLogSplits=true
			
 
				+hadoop.tasklog.logsRetainHours=12
			
 
				+
			
 
				+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
			
 
				+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
			
 
				+log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
			
 
				+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
			
 
				+
			
 
				+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
			
 
				+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
			
 
				+
			
 
				+#
			
 
				+#Security appender
			
 
				+#
			
 
				+hadoop.security.log.file=SecurityAuth.audit
			
 
				+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 
			
 
				+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
			
 
				+
			
 
				+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
			
 
				+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
			
 
				+#new logger
			
 
				+log4j.category.SecurityLogger=INFO,DRFAS
			
 
				+
			
 
				+#
			
 
				+# Rolling File Appender
			
 
				+#
			
 
				+
			
 
				+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
			
 
				+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
			
 
				+
			
 
				+# Logfile size and and 30-day backups
			
 
				+#log4j.appender.RFA.MaxFileSize=1MB
			
 
				+#log4j.appender.RFA.MaxBackupIndex=30
			
 
				+
			
 
				+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
			
 
				+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
			
 
				+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
			
 
				+
			
 
				+#
			
 
				+# FSNamesystem Audit logging
			
 
				+# All audit events are logged at INFO level
			
 
				+#
			
 
				+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
			
 
				+
			
 
				+# Custom Logging levels
			
 
				+
			
 
				+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
			
 
				+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
			
 
				+#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
			
 
				+
			
 
				+# Jets3t library
			
 
				+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
			
 
				+
			
 
				+#
			
 
				+# Event Counter Appender
			
 
				+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
			
 
				+#
			
 
				+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
			
 
				+
			
 
				+#
			
 
				+# Job Summary Appender
			
 
				+#
			
 
				+log4j.appender.JSA=org.apache.log4j.DailyRollingFileAppender
			
 
				+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
			
 
				+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
			
 
				+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
			
 
				+log4j.appender.JSA.DatePattern=.yyyy-MM-dd
			
 
				+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
			
 
				+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
			
 
				+
			
 
				+#
			
 
				+# MapReduce Audit Log Appender
			
 
				+#
			
 
				+
			
 
				+# Set the MapReduce audit log filename
			
 
				+#hadoop.mapreduce.audit.log.file=hadoop-mapreduce.audit.log
			
 
				+
			
 
				+# Appender for AuditLogger.
			
 
				+# Requires the following system properties to be set
			
 
				+#    - hadoop.log.dir (Hadoop Log directory)
			
 
				+#    - hadoop.mapreduce.audit.log.file (MapReduce audit log filename)
			
 
				+
			
 
				+#log4j.logger.org.apache.hadoop.mapred.AuditLogger=INFO,MRAUDIT
			
 
				+#log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
			
 
				+#log4j.appender.MRAUDIT=org.apache.log4j.DailyRollingFileAppender
			
 
				+#log4j.appender.MRAUDIT.File=${hadoop.log.dir}/${hadoop.mapreduce.audit.log.file}
			
 
				+#log4j.appender.MRAUDIT.DatePattern=.yyyy-MM-dd
			
 
				+#log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
			
 
				+#log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
			
--- a/common/conf/masters.template
+++ b/common/conf/masters.template
@@ -0,0 +1 @@
 
				+localhost
			
--- a/common/conf/slaves.template
+++ b/common/conf/slaves.template
@@ -0,0 +1 @@
 
				+localhost
			
--- a/common/conf/ssl-client.xml.example
+++ b/common/conf/ssl-client.xml.example
@@ -0,0 +1,57 @@
 
				+<?xml version="1.0"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+
			
 
				+<configuration>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.truststore.location</name>
			
 
				+  <value></value>
			
 
				+  <description>Truststore to be used by clients like distcp. Must be
			
 
				+  specified.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.truststore.password</name>
			
 
				+  <value></value>
			
 
				+  <description>Optional. Default value is "".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.truststore.type</name>
			
 
				+  <value>jks</value>
			
 
				+  <description>Optional. Default value is "jks".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.keystore.location</name>
			
 
				+  <value></value>
			
 
				+  <description>Keystore to be used by clients like distcp. Must be
			
 
				+  specified.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.keystore.password</name>
			
 
				+  <value></value>
			
 
				+  <description>Optional. Default value is "".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.keystore.keypassword</name>
			
 
				+  <value></value>
			
 
				+  <description>Optional. Default value is "".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.keystore.type</name>
			
 
				+  <value>jks</value>
			
 
				+  <description>Optional. Default value is "jks".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+</configuration>
			
--- a/common/conf/ssl-server.xml.example
+++ b/common/conf/ssl-server.xml.example
@@ -0,0 +1,55 @@
 
				+<?xml version="1.0"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+
			
 
				+<configuration>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.server.truststore.location</name>
			
 
				+  <value></value>
			
 
				+  <description>Truststore to be used by NN and DN. Must be specified.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.server.truststore.password</name>
			
 
				+  <value></value>
			
 
				+  <description>Optional. Default value is "".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.server.truststore.type</name>
			
 
				+  <value>jks</value>
			
 
				+  <description>Optional. Default value is "jks".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.server.keystore.location</name>
			
 
				+  <value></value>
			
 
				+  <description>Keystore to be used by NN and DN. Must be specified.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.server.keystore.password</name>
			
 
				+  <value></value>
			
 
				+  <description>Must be specified.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.server.keystore.keypassword</name>
			
 
				+  <value></value>
			
 
				+  <description>Must be specified.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.server.keystore.type</name>
			
 
				+  <value>jks</value>
			
 
				+  <description>Optional. Default value is "jks".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+</configuration>
			
--- a/common/ivy.xml
+++ b/common/ivy.xml
@@ -0,0 +1,261 @@
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<ivy-module version="1.0">
			
 
				+  <info organisation="org.apache.hadoop" module="${ant.project.name}" revision="${version}">
			
 
				+    <license name="Apache 2.0"/>
			
 
				+    <ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
			
 
				+    <description>
			
 
				+        Hadoop Common
			
 
				+    </description>
			
 
				+  </info>
			
 
				+  <configurations defaultconfmapping="default">
			
 
				+    <!--these match the Maven configurations-->
			
 
				+    <conf name="default" extends="master,runtime"/>
			
 
				+    <conf name="master" description="contains the artifact but no dependencies"/>
			
 
				+    <conf name="runtime" description="runtime but not the artifact"
			
 
				+      extends="client,server,s3-server,kfs,mandatory,jetty,ftp"/>
			
 
				+
			
 
				+    <conf name="mandatory" description="contains the critical  dependencies"
			
 
				+      extends="commons-logging,log4j"/>
			
 
				+
			
 
				+    <!--
			
 
				+    These public configurations contain the core dependencies for running hadoop client or server.
			
 
				+    The server is effectively a superset of the client.
			
 
				+    -->
			
 
				+    <conf name="client" description="client-side dependencies"
			
 
				+      extends="mandatory,httpclient"/>
			
 
				+    <conf name="server" description="server-side dependencies"
			
 
				+      extends="client"/>
			
 
				+    <conf name="s3-client" description="dependencies for working with S3/EC2 infrastructure"
			
 
				+      extends="client"/>
			
 
				+    <conf name="s3-server" description="dependencies for running on S3/EC2 infrastructure"
			
 
				+      extends="s3-client,server"/>
			
 
				+    <conf name="kfs" description="dependencies for KFS file system support"/>
			
 
				+    <conf name="ftp" description="dependencies for workign with FTP filesytems" 
			
 
				+              extends="mandatory"/>
			
 
				+    <conf name="jetty" description="Jetty provides the in-VM HTTP daemon" extends="commons-logging"/>
			
 
				+
			
 
				+    <conf name="common" extends="runtime,mandatory,httpclient,ftp,jetty,jdiff"
			
 
				+                        description="common artifacts"/>
			
 
				+    <!--Testing pulls in everything-->
			
 
				+   <conf name="test" extends="master" description="the classpath needed to run tests"/>
			
 
				+
			
 
				+    <!--Private configurations. -->
			
 
				+
			
 
				+    <conf name="javadoc" visibility="private" description="artiracts required while performing doc generation"
			
 
				+      extends="common,mandatory,jetty,lucene"/>
			
 
				+
			
 
				+    <conf name="releaseaudit" visibility="private"
			
 
				+	description="Artifacts required for releaseaudit target"/>
			
 
				+     
			
 
				+    <conf name="commons-logging" visibility="private"/>
			
 
				+    <conf name="httpclient" visibility="private" extends="commons-logging"/>
			
 
				+    <conf name="log4j" visibility="private"/>
			
 
				+    <conf name="lucene" visibility="private"/>
			
 
				+    <conf name="jdiff" visibility="private" extends="log4j,s3-client,jetty,server"/>
			
 
				+    <conf name="checkstyle" visibility="private"/>
			
 
				+
			
 
				+  </configurations>
			
 
				+
			
 
				+  <publications>
			
 
				+    <!--get the artifact from our module name-->
			
 
				+    <artifact conf="master"/>
			
 
				+  </publications>
			
 
				+  <dependencies>
			
 
				+
			
 
				+ <!--used client side-->
			
 
				+    <dependency org="commons-cli"
			
 
				+      name="commons-cli"
			
 
				+      rev="${commons-cli.version}"
			
 
				+      conf="client->default"/>
			
 
				+    <dependency org="checkstyle"
			
 
				+      name="checkstyle"
			
 
				+      rev="${checkstyle.version}"
			
 
				+      conf="checkstyle->default"/>
			
 
				+    <dependency org="jdiff"
			
 
				+      name="jdiff"
			
 
				+      rev="${jdiff.version}"
			
 
				+      conf="jdiff->default"/>
			
 
				+
			
 
				+    <dependency org="xmlenc"
			
 
				+      name="xmlenc"
			
 
				+      rev="${xmlenc.version}"
			
 
				+      conf="server->default"/>
			
 
				+
			
 
				+    <!--Configuration: httpclient-->
			
 
				+
			
 
				+    <dependency org="commons-codec"
			
 
				+      name="commons-codec"
			
 
				+      rev="${commons-codec.version}"
			
 
				+      conf="httpclient->default"/>
			
 
				+
			
 
				+    <dependency org="commons-net"
			
 
				+      name="commons-net"
			
 
				+      rev="${commons-net.version}"
			
 
				+      conf="ftp->default"/>
			
 
				+
			
 
				+    <!--Configuration: Jetty -->
			
 
				+    <dependency org="org.mortbay.jetty"
			
 
				+      name="jetty"
			
 
				+      rev="${jetty.version}"
			
 
				+      conf="jetty->default">
			
 
				+      <exclude module="ant"/>
			
 
				+    </dependency>
			
 
				+
			
 
				+    <dependency org="tomcat"
			
 
				+      name="jasper-runtime"
			
 
				+      rev="${jasper.version}"
			
 
				+      conf="jetty->master"/>
			
 
				+    <dependency org="tomcat"
			
 
				+      name="jasper-compiler"
			
 
				+      rev="${jasper.version}"
			
 
				+      conf="jetty->master"/>
			
 
				+    <dependency org="org.mortbay.jetty"
			
 
				+      name="jsp-2.1-jetty"
			
 
				+      rev="${jetty.version}"
			
 
				+      conf="jetty->default"/>
			
 
				+    <dependency org="commons-el"
			
 
				+      name="commons-el"
			
 
				+      rev="${commons-el.version}"
			
 
				+      conf="jetty->master"/>
			
 
				+
			
 
				+
			
 
				+    <!--Configuration: commons-logging -->
			
 
				+
			
 
				+    <!--it is essential that only the master JAR of commons logging
			
 
				+    is pulled in, as its dependencies are usually a mess, including things
			
 
				+    like out of date servlet APIs, bits of Avalon, etc.
			
 
				+    -->
			
 
				+    <dependency org="commons-logging"
			
 
				+      name="commons-logging"
			
 
				+      rev="${commons-logging.version}"
			
 
				+      conf="commons-logging->master"/>
			
 
				+
			
 
				+
			
 
				+    <!--Configuration: commons-logging -->
			
 
				+
			
 
				+    <!--log4J is not optional until commons-logging.properties is stripped out of the JAR -->
			
 
				+    <dependency org="log4j"
			
 
				+      name="log4j"
			
 
				+      rev="${log4j.version}"
			
 
				+      conf="log4j->master"/>
			
 
				+
			
 
				+    <!--Configuration: s3-client -->
			
 
				+    <!--there are two jets3t projects in the repository; this one goes up to 0.6 and
			
 
				+    is assumed to be the live one-->
			
 
				+    <dependency org="net.java.dev.jets3t"
			
 
				+      name="jets3t"
			
 
				+      rev="${jets3t.version}"
			
 
				+      conf="s3-client->default"/>
			
 
				+    <dependency org="commons-net"
			
 
				+      name="commons-net"
			
 
				+      rev="${commons-net.version}"
			
 
				+      conf="s3-client->master"/> 
			
 
				+    <dependency org="net.sf.kosmosfs"
			
 
				+      name="kfs"
			
 
				+      rev="${kfs.version}"
			
 
				+      conf="kfs->default"/>
			
 
				+
			
 
				+    <!--Configuration: test -->
			
 
				+    <!--artifacts needed for testing -->
			
 
				+
			
 
				+    <dependency org="junit"
			
 
				+      name="junit"
			
 
				+      rev="${junit.version}"
			
 
				+      conf="test->default"/>
			
 
				+    <dependency org="org.apache.rat"
			
 
				+      name="apache-rat-tasks"
			
 
				+      rev="${rats-lib.version}"
			
 
				+      conf="releaseaudit->default"/>
			
 
				+    <dependency org="commons-lang"
			
 
				+      name="commons-lang"
			
 
				+      rev="${commons-lang.version}"
			
 
				+      conf="releaseaudit->default"/>
			
 
				+    <dependency org="commons-collections"
			
 
				+      name="commons-collections"
			
 
				+      rev="${commons-collections.version}"
			
 
				+      conf="releaseaudit->default"/>
			
 
				+    <dependency org="hsqldb"
			
 
				+      name="hsqldb"
			
 
				+      rev="${hsqldb.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.apache.lucene"
			
 
				+      name="lucene-core"
			
 
				+      rev="${lucene-core.version}"
			
 
				+      conf="javadoc->default"/> 
			
 
				+    <dependency org="commons-logging"
			
 
				+      name="commons-logging-api"
			
 
				+      rev="${commons-logging-api.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.slf4j"
			
 
				+      name="slf4j-api"
			
 
				+      rev="${slf4j-api.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.eclipse.jdt"
			
 
				+      name="core"
			
 
				+      rev="${core.version}"
			
 
				+      conf="common->master"/>
			
 
				+    <dependency org="org.slf4j"
			
 
				+      name="slf4j-log4j12"
			
 
				+      rev="${slf4j-log4j12.version}"
			
 
				+      conf="common->master">
			
 
				+    </dependency>
			
 
				+    <dependency org="org.apache.avro"
			
 
				+      name="avro"
			
 
				+      rev="${avro.version}"
			
 
				+      conf="common->default">
			
 
				+      <exclude module="ant"/>
			
 
				+      <exclude module="jetty"/>
			
 
				+      <exclude module="slf4j-simple"/>
			
 
				+      <exclude module="velocity"/>
			
 
				+      <exclude module="netty" />
			
 
				+    </dependency>
			
 
				+    <dependency org="org.aspectj"
			
 
				+      name="aspectjrt"
			
 
				+      rev="${aspectj.version}"
			
 
				+      conf="common->default">
			
 
				+    </dependency>
			
 
				+    <dependency org="org.aspectj"
			
 
				+      name="aspectjtools"
			
 
				+      rev="${aspectj.version}"
			
 
				+      conf="common->default">
			
 
				+    </dependency>
			
 
				+    <dependency org="org.mockito" 
			
 
				+      name="mockito-all" 
			
 
				+      rev="${mockito-all.version}" 
			
 
				+      conf="test->default">
			
 
				+    </dependency> 
			
 
				+    <dependency org="com.jcraft"
			
 
				+      name="jsch"
			
 
				+      rev="${jsch.version}"
			
 
				+      conf="common->default">
			
 
				+    </dependency>
			
 
				+    <dependency org="commons-configuration"
			
 
				+      name="commons-configuration"
			
 
				+      rev="${commons-configuration.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.apache.commons"
			
 
				+      name="commons-math"
			
 
				+      rev="${commons-math.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="com.google.guava"
			
 
				+      name="guava"
			
 
				+      rev="${guava.version}"
			
 
				+      conf="common->default"/>
			
 
				+  </dependencies>
			
 
				+</ivy-module>
			
--- a/common/ivy/hadoop-common-instrumented-template.xml
+++ b/common/ivy/hadoop-common-instrumented-template.xml
@@ -0,0 +1,42 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <groupId>org.apache.hadoop</groupId>
			
 
				+  <artifactId>hadoop-common-instrumented</artifactId>
			
 
				+  <packaging>jar</packaging>
			
 
				+  <version>@version</version>
			
 
				+  <dependencies>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-common</artifactId>
			
 
				+      <version>@version</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.aspectj</groupId>
			
 
				+      <artifactId>aspectjrt</artifactId>
			
 
				+      <version>1.6.5</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.aspectj</groupId>
			
 
				+      <artifactId>aspectjtools</artifactId>
			
 
				+      <version>1.6.5</version>
			
 
				+    </dependency>
			
 
				+  </dependencies>
			
 
				+</project>
			
--- a/common/ivy/hadoop-common-template.xml
+++ b/common/ivy/hadoop-common-template.xml
@@ -0,0 +1,151 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <groupId>org.apache.hadoop</groupId>
			
 
				+  <artifactId>hadoop-common</artifactId>
			
 
				+  <packaging>jar</packaging>
			
 
				+  <version>@version</version>
			
 
				+  <dependencies>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-cli</groupId>
			
 
				+      <artifactId>commons-cli</artifactId>
			
 
				+      <version>1.2</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>xmlenc</groupId>
			
 
				+      <artifactId>xmlenc</artifactId>
			
 
				+      <version>0.52</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-codec</groupId>
			
 
				+      <artifactId>commons-codec</artifactId>
			
 
				+      <version>1.4</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-logging</groupId>
			
 
				+      <artifactId>commons-logging</artifactId>
			
 
				+      <version>1.1.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.slf4j</groupId>
			
 
				+      <artifactId>slf4j-api</artifactId>
			
 
				+      <version>1.6.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.slf4j</groupId>
			
 
				+      <artifactId>slf4j-log4j12</artifactId>
			
 
				+      <version>1.6.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>log4j</groupId>
			
 
				+      <artifactId>log4j</artifactId>
			
 
				+      <version>1.2.16</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.mortbay.jetty</groupId>
			
 
				+      <artifactId>jetty</artifactId>
			
 
				+      <version>6.1.26</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>tomcat</groupId>
			
 
				+      <artifactId>jasper-runtime</artifactId>
			
 
				+      <version>5.5.12</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>tomcat</groupId>
			
 
				+      <artifactId>jasper-compiler</artifactId>
			
 
				+      <version>5.5.12</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.mortbay.jetty</groupId>
			
 
				+      <artifactId>jsp-2.1-jetty</artifactId>
			
 
				+      <version>6.1.26</version>
			
 
				+      <exclusions>
			
 
				+        <exclusion>
			
 
				+          <groupId>ant</groupId>
			
 
				+          <artifactId>ant</artifactId>
			
 
				+        </exclusion>
			
 
				+      </exclusions>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-el</groupId>
			
 
				+      <artifactId>commons-el</artifactId>
			
 
				+      <version>1.0</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>net.java.dev.jets3t</groupId>
			
 
				+      <artifactId>jets3t</artifactId>
			
 
				+      <version>0.7.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-net</groupId>
			
 
				+      <artifactId>commons-net</artifactId>
			
 
				+      <version>1.4.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>net.sf.kosmosfs</groupId>
			
 
				+      <artifactId>kfs</artifactId>
			
 
				+      <version>0.3</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>hsqldb</groupId>
			
 
				+      <artifactId>hsqldb</artifactId>
			
 
				+      <version>1.8.0.10</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.avro</groupId>
			
 
				+      <artifactId>avro</artifactId>
			
 
				+      <version>1.4.1</version>
			
 
				+      <exclusions>
			
 
				+        <exclusion>
			
 
				+          <groupId>org.mortbay.jetty</groupId>
			
 
				+          <artifactId>jetty</artifactId>
			
 
				+        </exclusion>
			
 
				+        <exclusion>
			
 
				+          <groupId>org.apache.tools.ant</groupId>
			
 
				+          <artifactId>ant</artifactId>
			
 
				+        </exclusion>
			
 
				+        <exclusion>
			
 
				+          <groupId>org.jboss.netty</groupId>
			
 
				+          <artifactId>netty</artifactId>
			
 
				+        </exclusion>
			
 
				+        <exclusion>
			
 
				+          <groupId>org.apache.velocity</groupId>
			
 
				+          <artifactId>velocity</artifactId>
			
 
				+        </exclusion>
			
 
				+      </exclusions>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-configuration</groupId>
			
 
				+      <artifactId>commons-configuration</artifactId>
			
 
				+      <version>1.6</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.commons</groupId>
			
 
				+      <artifactId>commons-math</artifactId>
			
 
				+      <version>2.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>com.google.guava</groupId>
			
 
				+      <artifactId>guava</artifactId>
			
 
				+      <version>r07</version>
			
 
				+    </dependency>
			
 
				+  </dependencies>
			
 
				+</project>
			
--- a/common/ivy/hadoop-common-test-template.xml
+++ b/common/ivy/hadoop-common-test-template.xml
@@ -0,0 +1,43 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <groupId>org.apache.hadoop</groupId>
			
 
				+  <artifactId>hadoop-common-test</artifactId>
			
 
				+  <packaging>jar</packaging>
			
 
				+  <version>@version</version>
			
 
				+  <dependencies>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-common</artifactId>
			
 
				+      <version>@version</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>junit</groupId>
			
 
				+      <artifactId>junit</artifactId>
			
 
				+      <version>4.8.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.mockito</groupId>
			
 
				+      <artifactId>mockito-all</artifactId>
			
 
				+      <version>1.8.5</version>
			
 
				+    </dependency>
			
 
				+  </dependencies>
			
 
				+</project>
			
--- a/common/ivy/ivysettings.xml
+++ b/common/ivy/ivysettings.xml
@@ -0,0 +1,50 @@
 
				+<ivysettings>
			
 
				+ <!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+  <property name="repo.maven.org" value="http://repo1.maven.org/maven2/" override="false"/>
			
 
				+
			
 
				+  <property name="maven2.pattern" value="[organisation]/[module]/[revision]/[module]-[revision]"/>
			
 
				+  <property name="repo.dir" value="${user.home}/.m2/repository"/>
			
 
				+  <!-- pull in the local repository -->
			
 
				+  <include url="${ivy.default.conf.dir}/ivyconf-local.xml"/>
			
 
				+
			
 
				+  <property name="resolvers" value="default" override="false"/>
			
 
				+  <property name="force-resolve" value="false" override="false"/>
			
 
				+  <settings defaultResolver="${resolvers}"/>
			
 
				+
			
 
				+  <resolvers>
			
 
				+    <!--ibiblio resolvers-->
			
 
				+    <ibiblio name="maven2" root="${repo.maven.org}" m2compatible="true"/>
			
 
				+
			
 
				+    <filesystem name="fs" m2compatible="true" force="${force-resolve}">
			
 
				+       <artifact pattern="${repo.dir}/${maven2.pattern}.[ext]"/>
			
 
				+       <ivy pattern="${repo.dir}/${maven2.pattern}.pom"/>
			
 
				+    </filesystem>
			
 
				+
			
 
				+    <chain name="default" dual="true">
			
 
				+      <resolver ref="maven2"/>
			
 
				+    </chain>
			
 
				+
			
 
				+    <chain name="internal" dual="true">
			
 
				+      <resolver ref="fs"/>
			
 
				+      <resolver ref="maven2"/>
			
 
				+    </chain>
			
 
				+
			
 
				+  </resolvers>
			
 
				+
			
 
				+</ivysettings>
			
--- a/common/ivy/libraries.properties
+++ b/common/ivy/libraries.properties
@@ -0,0 +1,62 @@
 
				+#   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+#   you may not use this file except in compliance with the License.
			
 
				+#   You may obtain a copy of the License at
			
 
				+#
			
 
				+#       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+#   Unless required by applicable law or agreed to in writing, software
			
 
				+#   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+#   See the License for the specific language governing permissions and
			
 
				+#   limitations under the License.
			
 
				+
			
 
				+#This properties file lists the versions of the various artifacts used by hadoop and components.
			
 
				+#It drives ivy and the generation of a maven POM
			
 
				+
			
 
				+#These are the versions of our dependencies (in alphabetical order)
			
 
				+ant-task.version=2.0.10
			
 
				+aspectj.version=1.6.5
			
 
				+avro.version=1.4.1
			
 
				+
			
 
				+checkstyle.version=4.2
			
 
				+commons-cli.version=1.2
			
 
				+commons-codec.version=1.4
			
 
				+commons-collections.version=3.1
			
 
				+commons-configuration.version=1.6
			
 
				+commons-lang.version=2.5
			
 
				+commons-logging.version=1.1.1
			
 
				+commons-logging-api.version=1.1
			
 
				+commons-el.version=1.0
			
 
				+commons-fileupload.version=1.2
			
 
				+commons-io.version=1.4
			
 
				+commons-math.version=2.1
			
 
				+commons-net.version=1.4.1
			
 
				+core.version=3.1.1
			
 
				+
			
 
				+guava.version=r07
			
 
				+
			
 
				+hsqldb.version=1.8.0.10
			
 
				+
			
 
				+ivy.version=2.2.0
			
 
				+
			
 
				+jasper.version=5.5.12
			
 
				+jets3t.version=0.7.1
			
 
				+jetty.version=6.1.26
			
 
				+junit.version=4.8.1
			
 
				+jdiff.version=1.0.9
			
 
				+jsch.version=0.1.42
			
 
				+
			
 
				+kfs.version=0.3
			
 
				+
			
 
				+log4j.version=1.2.16
			
 
				+lucene-core.version=2.3.1
			
 
				+
			
 
				+mockito-all.version=1.8.5
			
 
				+
			
 
				+rats-lib.version=0.6
			
 
				+
			
 
				+slf4j-api.version=1.6.1
			
 
				+slf4j-log4j12.version=1.6.1
			
 
				+
			
 
				+xmlenc.version=0.52
			
 
				+xerces.version=1.4.4
			
--- a/common/lib/jdiff/hadoop-core_0.20.0.xml
+++ b/common/lib/jdiff/hadoop-core_0.20.0.xml
--- a/common/lib/jdiff/hadoop-core_0.21.0.xml
+++ b/common/lib/jdiff/hadoop-core_0.21.0.xml
--- a/common/lib/jdiff/hadoop_0.17.0.xml
+++ b/common/lib/jdiff/hadoop_0.17.0.xml
--- a/common/lib/jdiff/hadoop_0.18.1.xml
+++ b/common/lib/jdiff/hadoop_0.18.1.xml
--- a/common/lib/jdiff/hadoop_0.18.2.xml
+++ b/common/lib/jdiff/hadoop_0.18.2.xml
--- a/common/lib/jdiff/hadoop_0.18.3.xml
+++ b/common/lib/jdiff/hadoop_0.18.3.xml
--- a/common/lib/jdiff/hadoop_0.19.0.xml
+++ b/common/lib/jdiff/hadoop_0.19.0.xml
--- a/common/lib/jdiff/hadoop_0.19.1.xml
+++ b/common/lib/jdiff/hadoop_0.19.1.xml
--- a/common/lib/jdiff/hadoop_0.19.2.xml
+++ b/common/lib/jdiff/hadoop_0.19.2.xml
--- a/common/lib/jdiff/hadoop_0.20.0.xml
+++ b/common/lib/jdiff/hadoop_0.20.0.xml
--- a/common/lib/jdiff/hadoop_0.20.1.xml
+++ b/common/lib/jdiff/hadoop_0.20.1.xml
--- a/common/lib/jdiff/hadoop_0.20.2.xml
+++ b/common/lib/jdiff/hadoop_0.20.2.xml
--- a/common/src/contrib/bash-tab-completion/README
+++ b/common/src/contrib/bash-tab-completion/README
@@ -0,0 +1,11 @@
 
				+Bash tab completion support for the hadoop script.
			
 
				+
			
 
				+On Debian-like distributions, the script can be placed in
			
 
				+/etc/bash_completion.d/, and it will be sourced automatically by Bash. On
			
 
				+other distributions, you may source the file manually (`. hadoop.sh') or
			
 
				+source it from your bashrc (or equivalent) file.
			
 
				+
			
 
				+The script allows tab completion of all the command names, subcommands for the
			
 
				+'fs', 'dfsadmin', 'job', 'namenode' and 'pipe' commands, arguments of the 'jar'
			
 
				+command and most arguments to the 'fs' subcommands (completing local and 
			
 
				+dfs paths as appropriate).
			
--- a/common/src/contrib/bash-tab-completion/hadoop.sh
+++ b/common/src/contrib/bash-tab-completion/hadoop.sh
@@ -0,0 +1,121 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Provides tab completion for the main hadoop script.
			
 
				+#
			
 
				+# On debian-based systems, place in /etc/bash_completion.d/ and either restart
			
 
				+# Bash or source the script manually (. /etc/bash_completion.d/hadoop.sh).
			
 
				+
			
 
				+_hadoop() {
			
 
				+  local script cur prev temp
			
 
				+
			
 
				+  COMPREPLY=()
			
 
				+  cur=${COMP_WORDS[COMP_CWORD]}
			
 
				+  prev=${COMP_WORDS[COMP_CWORD-1]}  
			
 
				+  script=${COMP_WORDS[0]}  
			
 
				+  
			
 
				+  # Bash lets you tab complete things even if the script doesn't
			
 
				+  # exist (or isn't executable). Check to make sure it is, as we
			
 
				+  # need to execute it to get options/info
			
 
				+  if [ -f "$script" -a -x "$script" ]; then
			
 
				+    case $COMP_CWORD in
			
 
				+    1)
			
 
				+      # Completing the first argument (the command).
			
 
				+
			
 
				+      temp=`$script | grep -n "^\s*or"`;
			
 
				+      temp=`$script | head -n $((${temp%%:*} - 1)) | awk '/^ / {print $1}' | sort | uniq`;
			
 
				+      COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
			
 
				+      return 0;;
			
 
				+
			
 
				+    2)
			
 
				+      # Completing the second arg (first arg to the command)
			
 
				+
			
 
				+      # The output of commands isn't hugely consistent, so certain
			
 
				+      # names are hardcoded and parsed differently. Some aren't
			
 
				+      # handled at all (mostly ones without args).
			
 
				+      case ${COMP_WORDS[1]} in
			
 
				+      dfs | dfsadmin | fs | job | pipes)
			
 
				+        # One option per line, enclosed in square brackets
			
 
				+
			
 
				+        temp=`$script ${COMP_WORDS[1]} 2>&1 | awk '/^[ \t]*\[/ {gsub("[[\\]]", ""); print $1}'`;
			
 
				+        COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
			
 
				+        return 0;;
			
 
				+
			
 
				+      jar)
			
 
				+        # Any (jar) file
			
 
				+
			
 
				+        COMPREPLY=(`compgen -A file -- ${cur}`);
			
 
				+        return 0;;
			
 
				+
			
 
				+      namenode)
			
 
				+        # All options specified in one line,
			
 
				+        # enclosed in [] and separated with |
			
 
				+        temp=`$script ${COMP_WORDS[1]} -help 2>&1 | grep Usage: | cut -d '[' -f 2- | awk '{gsub("] \\| \\[|]", " "); print $0}'`;
			
 
				+        COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
			
 
				+        return 0;;
			
 
				+
			
 
				+      *)
			
 
				+        # Other commands - no idea
			
 
				+
			
 
				+        return 1;;
			
 
				+      esac;;
			
 
				+
			
 
				+    *)
			
 
				+      # Additional args
			
 
				+      
			
 
				+      case ${COMP_WORDS[1]} in
			
 
				+      dfs | fs)
			
 
				+        # DFS/FS subcommand completion
			
 
				+        # Pull the list of options, grep for the one the user is trying to use,
			
 
				+        # and then select the description of the relevant argument
			
 
				+        temp=$((${COMP_CWORD} - 1));
			
 
				+        temp=`$script ${COMP_WORDS[1]} 2>&1 | grep -- "${COMP_WORDS[2]} " | awk '{gsub("[[ \\]]", ""); print $0}' | cut -d '<' -f ${temp}`;
			
 
				+
			
 
				+        if [ ${#temp} -lt 1 ]; then
			
 
				+          # No match
			
 
				+          return 1;
			
 
				+        fi;
			
 
				+
			
 
				+        temp=${temp:0:$((${#temp} - 1))};
			
 
				+
			
 
				+        # Now do completion based on the argument
			
 
				+        case $temp in
			
 
				+        path | src | dst)
			
 
				+          # DFS path completion
			
 
				+          temp=`$script ${COMP_WORDS[1]} -ls "${cur}*" 2>&1 | grep -vE '^Found ' | cut -f 1 | awk '{gsub("^.* ", ""); print $0;}'`
			
 
				+          COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
			
 
				+          return 0;;
			
 
				+
			
 
				+        localsrc | localdst)
			
 
				+          # Local path completion
			
 
				+          COMPREPLY=(`compgen -A file -- ${cur}`);
			
 
				+          return 0;;
			
 
				+
			
 
				+        *)
			
 
				+          # Other arguments - no idea
			
 
				+          return 1;;
			
 
				+        esac;;
			
 
				+
			
 
				+      *)
			
 
				+        # Other subcommands - no idea
			
 
				+        return 1;;
			
 
				+      esac;
			
 
				+    esac;
			
 
				+  fi;
			
 
				+}
			
 
				+
			
 
				+complete -F _hadoop hadoop
			
--- a/common/src/contrib/build-contrib.xml
+++ b/common/src/contrib/build-contrib.xml
@@ -0,0 +1,305 @@
 
				+<?xml version="1.0"?>
			
 
				+
			
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<!-- Imported by contrib/*/build.xml files to share generic targets. -->
			
 
				+
			
 
				+<project name="hadoopbuildcontrib" xmlns:ivy="antlib:org.apache.ivy.ant">
			
 
				+
			
 
				+  <property name="name" value="${ant.project.name}"/>
			
 
				+  <dirname property="src.contrib.dir" file="${ant.file.hadoopbuildcontrib}" />
			
 
				+  <property name="root" value="${basedir}"/>
			
 
				+
			
 
				+  <!-- Load all the default properties, and any the user wants    -->
			
 
				+  <!-- to contribute (without having to type -D or edit this file -->
			
 
				+  <property file="${user.home}/${name}.build.properties" />
			
 
				+  <property file="${root}/build.properties" />
			
 
				+
			
 
				+  <property name="hadoop.root" location="${src.contrib.dir}/../../"/>
			
 
				+  <property name="src.dir"  location="${root}/src/java"/>
			
 
				+  <property name="src.test" location="${root}/src/test"/>
			
 
				+  <property name="src.examples" location="${root}/src/examples"/>
			
 
				+
			
 
				+  <available file="${src.examples}" type="dir" property="examples.available"/>
			
 
				+  <available file="${src.test}" type="dir" property="test.available"/>
			
 
				+
			
 
				+  <property name="conf.dir" location="${hadoop.root}/conf"/>
			
 
				+  <property name="test.junit.output.format" value="plain"/>
			
 
				+  <property name="test.output" value="no"/>
			
 
				+  <property name="test.timeout" value="900000"/>
			
 
				+  <property name="build.dir" location="${hadoop.root}/build/contrib/${name}"/>
			
 
				+  <property name="build.classes" location="${build.dir}/classes"/>
			
 
				+  <property name="build.test" location="${build.dir}/test"/>
			
 
				+  <property name="build.examples" location="${build.dir}/examples"/>
			
 
				+  <property name="hadoop.log.dir" location="${build.dir}/test/logs"/>
			
 
				+  <!-- all jars together -->
			
 
				+  <property name="javac.deprecation" value="off"/>
			
 
				+  <property name="javac.debug" value="on"/>
			
 
				+  <property name="build.ivy.lib.dir" value="${hadoop.root}/build/ivy/lib"/> 
			
 
				+
			
 
				+  <property name="javadoc.link"
			
 
				+            value="http://java.sun.com/j2se/1.4/docs/api/"/>
			
 
				+
			
 
				+  <property name="build.encoding" value="ISO-8859-1"/>
			
 
				+
			
 
				+  <fileset id="lib.jars" dir="${root}" includes="lib/*.jar"/>
			
 
				+
			
 
				+
			
 
				+   <!-- IVY properties set here -->
			
 
				+  <property name="ivy.dir" location="ivy" />
			
 
				+  <property name="ivysettings.xml" location="${hadoop.root}/ivy/ivysettings.xml"/>
			
 
				+  <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
			
 
				+  <loadproperties srcfile="${hadoop.root}/ivy/libraries.properties"/>
			
 
				+  <property name="ivy.jar" location="${hadoop.root}/ivy/ivy-${ivy.version}.jar"/>
			
 
				+  <property name="ivy_repo_url" 
			
 
				+	value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar" />
			
 
				+  <property name="build.dir" location="build" />
			
 
				+  <property name="build.ivy.dir" location="${build.dir}/ivy" />
			
 
				+  <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
			
 
				+  <property name="build.ivy.report.dir" location="${build.ivy.dir}/report" />
			
 
				+  <property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/> 
			
 
				+
			
 
				+  <!--this is the naming policy for artifacts we want pulled down-->
			
 
				+  <property name="ivy.artifact.retrieve.pattern"
			
 
				+    			value="${ant.project.name}/[conf]/[artifact]-[revision].[ext]"/>
			
 
				+
			
 
				+  <!-- the normal classpath -->
			
 
				+  <path id="contrib-classpath">
			
 
				+    <pathelement location="${build.classes}"/>
			
 
				+    <fileset refid="lib.jars"/>
			
 
				+    <pathelement location="${hadoop.root}/build/classes"/>
			
 
				+    <fileset dir="${hadoop.root}/lib">
			
 
				+      <include name="**/*.jar" />
			
 
				+    </fileset>
			
 
				+    <path refid="${ant.project.name}.common-classpath"/>
			
 
				+    <pathelement path="${clover.jar}"/>
			
 
				+  </path>
			
 
				+
			
 
				+  <!-- the unit test classpath -->
			
 
				+  <path id="test.classpath">
			
 
				+    <pathelement location="${build.test}" />
			
 
				+    <pathelement location="${hadoop.root}/build/test/classes"/>
			
 
				+    <pathelement location="${hadoop.root}/build/test/core/classes"/>
			
 
				+    <pathelement location="${hadoop.root}/build/test/hdfs/classes"/>
			
 
				+    <pathelement location="${hadoop.root}/build/test/mapred/classes"/>
			
 
				+    <pathelement location="${hadoop.root}/src/contrib/test"/>
			
 
				+    <pathelement location="${conf.dir}"/>
			
 
				+    <pathelement location="${hadoop.root}/build"/>
			
 
				+    <pathelement location="${build.examples}"/>
			
 
				+    <path refid="contrib-classpath"/>
			
 
				+  </path>
			
 
				+
			
 
				+
			
 
				+  <!-- to be overridden by sub-projects -->
			
 
				+  <target name="check-contrib"/>
			
 
				+  <target name="init-contrib"/>
			
 
				+
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Stuff needed by all targets                            -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="init" depends="check-contrib" unless="skip.contrib">
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <mkdir dir="${build.dir}"/>
			
 
				+    <mkdir dir="${build.classes}"/>
			
 
				+    <mkdir dir="${build.test}"/>
			
 
				+    <mkdir dir="${build.examples}"/>
			
 
				+    <mkdir dir="${hadoop.log.dir}"/>
			
 
				+    <antcall target="init-contrib"/>
			
 
				+  </target>
			
 
				+
			
 
				+
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Compile a Hadoop contrib's files                       -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="compile" depends="init, ivy-retrieve-common" unless="skip.contrib">
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <javac
			
 
				+     encoding="${build.encoding}"
			
 
				+     srcdir="${src.dir}"
			
 
				+     includes="**/*.java"
			
 
				+     destdir="${build.classes}"
			
 
				+     debug="${javac.debug}"
			
 
				+     deprecation="${javac.deprecation}">
			
 
				+     <classpath refid="contrib-classpath"/>
			
 
				+    </javac>
			
 
				+  </target>
			
 
				+
			
 
				+
			
 
				+  <!-- ======================================================= -->
			
 
				+  <!-- Compile a Hadoop contrib's example files (if available) -->
			
 
				+  <!-- ======================================================= -->
			
 
				+  <target name="compile-examples" depends="compile" if="examples.available">
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <javac
			
 
				+     encoding="${build.encoding}"
			
 
				+     srcdir="${src.examples}"
			
 
				+     includes="**/*.java"
			
 
				+     destdir="${build.examples}"
			
 
				+     debug="${javac.debug}">
			
 
				+     <classpath refid="contrib-classpath"/>
			
 
				+    </javac>
			
 
				+  </target>
			
 
				+
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Compile test code                                                  -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="compile-test" depends="compile-examples" if="test.available">
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <javac
			
 
				+     encoding="${build.encoding}"
			
 
				+     srcdir="${src.test}"
			
 
				+     includes="**/*.java"
			
 
				+     destdir="${build.test}"
			
 
				+     debug="${javac.debug}">
			
 
				+    <classpath refid="test.classpath"/>
			
 
				+    </javac>
			
 
				+  </target>
			
 
				+  
			
 
				+
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Make a Hadoop contrib's jar                            -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="jar" depends="compile" unless="skip.contrib">
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <jar
			
 
				+      jarfile="${build.dir}/hadoop-${version}-${name}.jar"
			
 
				+      basedir="${build.classes}"      
			
 
				+    />
			
 
				+  </target>
			
 
				+
			
 
				+  
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Make a Hadoop contrib's examples jar                   -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="jar-examples" depends="compile-examples"
			
 
				+          if="examples.available" unless="skip.contrib">
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <jar jarfile="${build.dir}/hadoop-${version}-${name}-examples.jar">
			
 
				+      <fileset dir="${build.classes}">
			
 
				+      </fileset>
			
 
				+      <fileset dir="${build.examples}">
			
 
				+      </fileset>
			
 
				+    </jar>
			
 
				+  </target>
			
 
				+  
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Package a Hadoop contrib                               -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="package" depends="jar, jar-examples" unless="skip.contrib"> 
			
 
				+    <mkdir dir="${dist.dir}/contrib/${name}"/>
			
 
				+    <copy todir="${dist.dir}/contrib/${name}" includeEmptyDirs="false" flatten="true">
			
 
				+      <fileset dir="${build.dir}">
			
 
				+        <include name="hadoop-${version}-${name}.jar" />
			
 
				+      </fileset>
			
 
				+    </copy>
			
 
				+  </target>
			
 
				+  
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Run unit tests                                                     -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="test" depends="compile-test, compile" if="test.available">
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <delete dir="${hadoop.log.dir}"/>
			
 
				+    <mkdir dir="${hadoop.log.dir}"/>
			
 
				+    <junit
			
 
				+      printsummary="yes" showoutput="${test.output}" 
			
 
				+      haltonfailure="no" fork="yes" maxmemory="256m"
			
 
				+      errorProperty="tests.failed" failureProperty="tests.failed"
			
 
				+      timeout="${test.timeout}">
			
 
				+      
			
 
				+      <sysproperty key="test.build.data" value="${build.test}/data"/>
			
 
				+      <sysproperty key="build.test" value="${build.test}"/>
			
 
				+      <sysproperty key="contrib.name" value="${name}"/>
			
 
				+      
			
 
				+      <!-- requires fork=yes for: 
			
 
				+        relative File paths to use the specified user.dir 
			
 
				+        classpath to use build/contrib/*.jar
			
 
				+      -->
			
 
				+      <sysproperty key="user.dir" value="${build.test}/data"/>
			
 
				+      
			
 
				+      <sysproperty key="fs.default.name" value="${fs.default.name}"/>
			
 
				+      <sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
			
 
				+      <sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/> 
			
 
				+      <sysproperty key="taskcontroller-path" value="${taskcontroller-path}"/>
			
 
				+      <sysproperty key="taskcontroller-user" value="${taskcontroller-user}"/>
			
 
				+      <classpath refid="test.classpath"/>
			
 
				+      <formatter type="${test.junit.output.format}" />
			
 
				+      <batchtest todir="${build.test}" unless="testcase">
			
 
				+        <fileset dir="${src.test}"
			
 
				+                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
			
 
				+      </batchtest>
			
 
				+      <batchtest todir="${build.test}" if="testcase">
			
 
				+        <fileset dir="${src.test}" includes="**/${testcase}.java"/>
			
 
				+      </batchtest>
			
 
				+    </junit>
			
 
				+    <fail if="tests.failed">Tests failed!</fail>
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Clean.  Delete the build files, and their directories              -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="clean">
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <delete dir="${build.dir}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-probe-antlib" >
			
 
				+    <condition property="ivy.found">
			
 
				+      <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
			
 
				+    </condition>
			
 
				+  </target>
			
 
				+
			
 
				+
			
 
				+  <target name="ivy-download" description="To download ivy " unless="offline">
			
 
				+    <get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-init-antlib" depends="ivy-download,ivy-probe-antlib" unless="ivy.found">
			
 
				+    <typedef uri="antlib:org.apache.ivy.ant" onerror="fail"
			
 
				+      loaderRef="ivyLoader">
			
 
				+      <classpath>
			
 
				+        <pathelement location="${ivy.jar}"/>
			
 
				+      </classpath>
			
 
				+    </typedef>
			
 
				+    <fail >
			
 
				+      <condition >
			
 
				+        <not>
			
 
				+          <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
			
 
				+        </not>
			
 
				+      </condition>
			
 
				+      You need Apache Ivy 2.0 or later from http://ant.apache.org/
			
 
				+      It could not be loaded from ${ivy_repo_url}
			
 
				+    </fail>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-init" depends="ivy-init-antlib">
			
 
				+    <ivy:configure settingsid="${ant.project.name}.ivy.settings" file="${ivysettings.xml}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-resolve-common" depends="ivy-init">
			
 
				+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="common" />
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="ivy-retrieve-common" depends="ivy-resolve-common"
			
 
				+    description="Retrieve Ivy-managed artifacts for the compile/test configurations">
			
 
				+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings" 
			
 
				+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}" sync="true" />
			
 
				+    <ivy:cachepath pathid="${ant.project.name}.common-classpath" conf="common" />
			
 
				+  </target>
			
 
				+</project>
			
--- a/common/src/contrib/build.xml
+++ b/common/src/contrib/build.xml
@@ -0,0 +1,64 @@
 
				+<?xml version="1.0"?>
			
 
				+
			
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<project name="hadoopcontrib" default="compile" basedir=".">
			
 
				+  
			
 
				+  <!-- In case one of the contrib subdirectories -->
			
 
				+  <!-- fails the build or test targets and you cannot fix it: -->
			
 
				+  <!-- Then add to fileset: excludes="badcontrib/build.xml" -->
			
 
				+
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Compile contribs.                                      -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="compile">
			
 
				+    <subant target="compile">
			
 
				+      <fileset dir="." includes="*/build.xml"/>
			
 
				+    </subant>
			
 
				+  </target>
			
 
				+  
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Package contrib jars.                                  -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="package">
			
 
				+    <subant target="package">
			
 
				+      <fileset dir="." includes="*/build.xml"/>
			
 
				+    </subant>
			
 
				+  </target>
			
 
				+  
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Test all the contribs.                               -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="test">
			
 
				+    <subant target="test">
			
 
				+      <fileset dir="." includes="failmon/build.xml"/>
			
 
				+      <fileset dir="." includes="hod/build.xml"/>
			
 
				+    </subant>
			
 
				+  </target>
			
 
				+  
			
 
				+  
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Clean all the contribs.                              -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="clean">
			
 
				+    <subant target="clean">
			
 
				+      <fileset dir="." includes="*/build.xml"/>
			
 
				+    </subant>
			
 
				+  </target>
			
 
				+
			
 
				+</project>
			
--- a/common/src/contrib/ec2/README.txt
+++ b/common/src/contrib/ec2/README.txt
@@ -0,0 +1,15 @@
 
				+Hadoop EC2
			
 
				+
			
 
				+NOTE: these scripts have been deprecated. See http://incubator.apache.org/whirr.
			
 
				+
			
 
				+This collection of scripts allows you to run Hadoop clusters on Amazon.com's Elastic Compute Cloud (EC2) service described at:
			
 
				+
			
 
				+  http://aws.amazon.com/ec2
			
 
				+  
			
 
				+To get help, type the following in a shell:
			
 
				+  
			
 
				+  bin/hadoop-ec2
			
 
				+
			
 
				+For full instructions, please visit the Hadoop wiki at:
			
 
				+
			
 
				+  http://wiki.apache.org/hadoop/AmazonEC2#AutomatedScripts
			
--- a/common/src/contrib/ec2/bin/cmd-hadoop-cluster
+++ b/common/src/contrib/ec2/bin/cmd-hadoop-cluster
@@ -0,0 +1,71 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Run commands on master or specified node of a running Hadoop EC2 cluster.
			
 
				+
			
 
				+set -o errexit
			
 
				+
			
 
				+# if no args specified, show usage
			
 
				+if [ $# = 0 ]; then
			
 
				+  echo "Command required!"
			
 
				+  exit 1
			
 
				+fi
			
 
				+
			
 
				+# get arguments
			
 
				+COMMAND="$1"
			
 
				+shift
			
 
				+# get group
			
 
				+CLUSTER="$1"
			
 
				+shift
			
 
				+
			
 
				+if [ -z $CLUSTER ]; then
			
 
				+  echo "Cluster name or instance id required!"
			
 
				+  exit -1
			
 
				+fi
			
 
				+
			
 
				+# Import variables
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+. "$bin"/hadoop-ec2-env.sh
			
 
				+
			
 
				+if [[ $CLUSTER == i-* ]]; then
			
 
				+  HOST=`ec2-describe-instances $CLUSTER | grep running | awk '{print $4}'`
			
 
				+  [ -z $HOST ] && echo "Instance still pending or no longer running: $CLUSTER" && exit -1
			
 
				+else
			
 
				+  [ ! -f $MASTER_IP_PATH ] && echo "Wrong group name, or cluster not launched! $CLUSTER" && exit -1
			
 
				+  HOST=`cat $MASTER_IP_PATH`
			
 
				+fi
			
 
				+
			
 
				+if [ "$COMMAND" = "login" ] ; then
			
 
				+  echo "Logging in to host $HOST."
			
 
				+  ssh $SSH_OPTS "root@$HOST"
			
 
				+elif [ "$COMMAND" = "proxy" ] ; then
			
 
				+  echo "Proxying to host $HOST via local port 6666"
			
 
				+  echo "Gangia:     http://$HOST/ganglia"
			
 
				+  echo "JobTracker: http://$HOST:50030/"
			
 
				+  echo "NameNode:   http://$HOST:50070/"
			
 
				+  ssh $SSH_OPTS -D 6666 -N "root@$HOST"
			
 
				+elif [ "$COMMAND" = "push" ] ; then
			
 
				+  echo "Pushing $1 to host $HOST."
			
 
				+  scp $SSH_OPTS -r $1 "root@$HOST:"
			
 
				+elif [ "$COMMAND" = "screen" ] ; then
			
 
				+  echo "Logging in and attaching screen on host $HOST."
			
 
				+  ssh $SSH_OPTS -t "root@$HOST" 'screen -D -R'
			
 
				+else
			
 
				+  echo "Executing command on host $HOST."
			
 
				+  ssh $SSH_OPTS -t "root@$HOST" "$COMMAND"
			
 
				+fi
			
--- a/common/src/contrib/ec2/bin/create-hadoop-image
+++ b/common/src/contrib/ec2/bin/create-hadoop-image
@@ -0,0 +1,80 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Create a Hadoop AMI.
			
 
				+# Inspired by Jonathan Siegel's EC2 script (http://blogsiegel.blogspot.com/2006/08/sandboxing-amazon-ec2.html)
			
 
				+
			
 
				+set -o errexit
			
 
				+
			
 
				+# Import variables
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+. "$bin"/hadoop-ec2-env.sh
			
 
				+
			
 
				+AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH | grep available | awk '{print $2}'`
			
 
				+
			
 
				+[ ! -z $AMI_IMAGE ] && echo "AMI already registered, use: ec2-deregister $AMI_IMAGE" && exit -1
			
 
				+
			
 
				+echo "Starting a AMI with ID $BASE_AMI_IMAGE."
			
 
				+OUTPUT=`ec2-run-instances $BASE_AMI_IMAGE -k $KEY_NAME -t $INSTANCE_TYPE`
			
 
				+BOOTING_INSTANCE=`echo $OUTPUT | awk '{print $6}'`
			
 
				+
			
 
				+echo "Instance is $BOOTING_INSTANCE."
			
 
				+
			
 
				+echo "Polling server status (ec2-describe-instances $BOOTING_INSTANCE)"
			
 
				+while true; do
			
 
				+  printf "."
			
 
				+  HOSTNAME=`ec2-describe-instances $BOOTING_INSTANCE | grep running | awk '{print $4}'`
			
 
				+  if [ ! -z $HOSTNAME ]; then
			
 
				+    break;
			
 
				+  fi
			
 
				+  sleep 1
			
 
				+done
			
 
				+
			
 
				+echo "The server is available at $HOSTNAME."
			
 
				+while true; do
			
 
				+  REPLY=`ssh $SSH_OPTS "root@$HOSTNAME" 'echo "hello"'`
			
 
				+  if [ ! -z $REPLY ]; then
			
 
				+   break;
			
 
				+  fi
			
 
				+  sleep 5
			
 
				+done
			
 
				+
			
 
				+#read -p "Login first? [yes or no]: " answer
			
 
				+
			
 
				+if [ "$answer" == "yes" ]; then
			
 
				+  ssh $SSH_OPTS "root@$HOSTNAME"
			
 
				+fi
			
 
				+
			
 
				+echo "Copying scripts."
			
 
				+
			
 
				+# Copy setup scripts
			
 
				+scp $SSH_OPTS "$bin"/hadoop-ec2-env.sh "root@$HOSTNAME:/mnt"
			
 
				+scp $SSH_OPTS "$bin"/image/create-hadoop-image-remote "root@$HOSTNAME:/mnt"
			
 
				+scp $SSH_OPTS "$bin"/image/ec2-run-user-data "root@$HOSTNAME:/etc/init.d"
			
 
				+
			
 
				+# Copy private key and certificate (for bundling image)
			
 
				+scp $SSH_OPTS $EC2_KEYDIR/pk*.pem "root@$HOSTNAME:/mnt"
			
 
				+scp $SSH_OPTS $EC2_KEYDIR/cert*.pem "root@$HOSTNAME:/mnt"
			
 
				+
			
 
				+# Connect to it
			
 
				+ssh $SSH_OPTS "root@$HOSTNAME" '/mnt/create-hadoop-image-remote'
			
 
				+
			
 
				+# Register image
			
 
				+ec2-register $S3_BUCKET/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml
			
 
				+
			
 
				+echo "Terminate with: ec2-terminate-instances $BOOTING_INSTANCE"
			
--- a/common/src/contrib/ec2/bin/delete-hadoop-cluster
+++ b/common/src/contrib/ec2/bin/delete-hadoop-cluster
@@ -0,0 +1,60 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Delete the groups and local files associated with a cluster.
			
 
				+
			
 
				+set -o errexit
			
 
				+
			
 
				+if [ -z $1 ]; then
			
 
				+  echo "Cluster name required!"
			
 
				+  exit -1
			
 
				+fi
			
 
				+
			
 
				+CLUSTER=$1
			
 
				+
			
 
				+# Finding Hadoop clusters
			
 
				+CLUSTERS=`ec2-describe-instances | \
			
 
				+  awk '"RESERVATION" == $1 && $4 ~ /-master$/, "INSTANCE" == $1' | tr '\n' '\t' | \
			
 
				+  grep "$CLUSTER" | grep running | cut -f4 | rev | cut -d'-' -f2- | rev`
			
 
				+  
			
 
				+if [ -n "$CLUSTERS" ]; then
			
 
				+  echo "Cluster $CLUSTER has running instances. Please terminate them first."
			
 
				+  exit 0
			
 
				+fi
			
 
				+
			
 
				+# Import variables
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+. "$bin"/hadoop-ec2-env.sh
			
 
				+
			
 
				+rm -f $MASTER_IP_PATH
			
 
				+rm -f $MASTER_PRIVATE_IP_PATH
			
 
				+
			
 
				+if ec2-describe-group $CLUSTER_MASTER > /dev/null 2>&1; then
			
 
				+  if ec2-describe-group $CLUSTER > /dev/null 2>&1; then
			
 
				+    echo "Revoking authorization between $CLUSTER_MASTER and $CLUSTER"
			
 
				+    ec2-revoke $CLUSTER_MASTER -o $CLUSTER -u $AWS_ACCOUNT_ID || true
			
 
				+    ec2-revoke $CLUSTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID || true
			
 
				+  fi
			
 
				+  echo "Deleting group $CLUSTER_MASTER"
			
 
				+  ec2-delete-group $CLUSTER_MASTER
			
 
				+fi
			
 
				+
			
 
				+if ec2-describe-group $CLUSTER > /dev/null 2>&1; then
			
 
				+  echo "Deleting group $CLUSTER"
			
 
				+  ec2-delete-group $CLUSTER
			
 
				+fi
			
--- a/common/src/contrib/ec2/bin/hadoop-ec2
+++ b/common/src/contrib/ec2/bin/hadoop-ec2
@@ -0,0 +1,65 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+set -o errexit
			
 
				+
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+
			
 
				+echo "DEPRECATED. See http://incubator.apache.org/whirr." >&2
			
 
				+
			
 
				+# if no args specified, show usage
			
 
				+if [ $# = 0 ]; then
			
 
				+  echo "Usage: hadoop-ec2 COMMAND"
			
 
				+  echo "where COMMAND is one of:"
			
 
				+  echo "  list                                 list all running Hadoop EC2 clusters"
			
 
				+  echo "  launch-cluster <group> <num slaves>  launch a cluster of Hadoop EC2 instances - launch-master then launch-slaves"
			
 
				+  echo "  launch-master  <group>               launch or find a cluster master"
			
 
				+  echo "  launch-slaves  <group> <num slaves>  launch the cluster slaves"
			
 
				+  echo "  terminate-cluster  <group>           terminate all Hadoop EC2 instances"
			
 
				+  echo "  delete-cluster <group>               delete the group information for a terminated cluster"
			
 
				+  echo "  login  <group|instance id>           login to the master node of the Hadoop EC2 cluster"
			
 
				+  echo "  screen <group|instance id>           start or attach 'screen' on the master node of the Hadoop EC2 cluster"
			
 
				+  echo "  proxy  <group|instance id>           start a socks proxy on localhost:6666 (use w/foxyproxy)"
			
 
				+  echo "  push   <group> <file>                scp a file to the master node of the Hadoop EC2 cluster"
			
 
				+  echo "  <shell cmd> <group|instance id>      execute any command remotely on the master"
			
 
				+  echo "  create-image                         create a Hadoop AMI"
			
 
				+  exit 1
			
 
				+fi
			
 
				+
			
 
				+# get arguments
			
 
				+COMMAND="$1"
			
 
				+shift
			
 
				+
			
 
				+if [ "$COMMAND" = "create-image" ] ; then
			
 
				+  . "$bin"/create-hadoop-image $*
			
 
				+elif [ "$COMMAND" = "launch-cluster" ] ; then
			
 
				+  . "$bin"/launch-hadoop-cluster $*
			
 
				+elif [ "$COMMAND" = "launch-master" ] ; then
			
 
				+  . "$bin"/launch-hadoop-master $*
			
 
				+elif [ "$COMMAND" = "launch-slaves" ] ; then
			
 
				+  . "$bin"/launch-hadoop-slaves $*
			
 
				+elif [ "$COMMAND" = "delete-cluster" ] ; then
			
 
				+  . "$bin"/delete-hadoop-cluster $*
			
 
				+elif [ "$COMMAND" = "terminate-cluster" ] ; then
			
 
				+  . "$bin"/terminate-hadoop-cluster $*
			
 
				+elif [ "$COMMAND" = "list" ] ; then
			
 
				+  . "$bin"/list-hadoop-clusters
			
 
				+else
			
 
				+  . "$bin"/cmd-hadoop-cluster "$COMMAND" $*
			
 
				+fi
			
 
				+
			
--- a/common/src/contrib/ec2/bin/hadoop-ec2-env.sh.template
+++ b/common/src/contrib/ec2/bin/hadoop-ec2-env.sh.template
@@ -0,0 +1,93 @@
 
				+# Set environment variables for running Hadoop on Amazon EC2 here. All are required.
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Your Amazon Account Number.
			
 
				+AWS_ACCOUNT_ID=
			
 
				+
			
 
				+# Your Amazon AWS access key.
			
 
				+AWS_ACCESS_KEY_ID=
			
 
				+
			
 
				+# Your Amazon AWS secret access key.
			
 
				+AWS_SECRET_ACCESS_KEY=
			
 
				+
			
 
				+# Location of EC2 keys.
			
 
				+# The default setting is probably OK if you set up EC2 following the Amazon Getting Started guide.
			
 
				+EC2_KEYDIR=`dirname "$EC2_PRIVATE_KEY"`
			
 
				+
			
 
				+# The EC2 key name used to launch instances.
			
 
				+# The default is the value used in the Amazon Getting Started guide.
			
 
				+KEY_NAME=gsg-keypair
			
 
				+
			
 
				+# Where your EC2 private key is stored (created when following the Amazon Getting Started guide).
			
 
				+# You need to change this if you don't store this with your other EC2 keys.
			
 
				+PRIVATE_KEY_PATH=`echo "$EC2_KEYDIR"/"id_rsa-$KEY_NAME"`
			
 
				+
			
 
				+# SSH options used when connecting to EC2 instances.
			
 
				+SSH_OPTS=`echo -i "$PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no -o ServerAliveInterval=30`
			
 
				+
			
 
				+# The version of Hadoop to use.
			
 
				+HADOOP_VERSION=0.19.0
			
 
				+
			
 
				+# The Amazon S3 bucket where the Hadoop AMI is stored.
			
 
				+# The default value is for public images, so can be left if you are using running a public image.
			
 
				+# Change this value only if you are creating your own (private) AMI
			
 
				+# so you can store it in a bucket you own.
			
 
				+S3_BUCKET=hadoop-images
			
 
				+
			
 
				+# Enable public access to JobTracker and TaskTracker web interfaces
			
 
				+ENABLE_WEB_PORTS=true
			
 
				+
			
 
				+# The script to run on instance boot.
			
 
				+USER_DATA_FILE=hadoop-ec2-init-remote.sh
			
 
				+
			
 
				+# The EC2 instance type: m1.small, m1.large, m1.xlarge
			
 
				+INSTANCE_TYPE="m1.small"
			
 
				+#INSTANCE_TYPE="m1.large"
			
 
				+#INSTANCE_TYPE="m1.xlarge"
			
 
				+#INSTANCE_TYPE="c1.medium"
			
 
				+#INSTANCE_TYPE="c1.xlarge"
			
 
				+
			
 
				+# The EC2 group master name. CLUSTER is set by calling scripts
			
 
				+CLUSTER_MASTER=$CLUSTER-master
			
 
				+
			
 
				+# Cached values for a given cluster
			
 
				+MASTER_PRIVATE_IP_PATH=~/.hadooop-private-$CLUSTER_MASTER
			
 
				+MASTER_IP_PATH=~/.hadooop-$CLUSTER_MASTER
			
 
				+MASTER_ZONE_PATH=~/.hadooop-zone-$CLUSTER_MASTER
			
 
				+
			
 
				+#
			
 
				+# The following variables are only used when creating an AMI.
			
 
				+#
			
 
				+
			
 
				+# The version number of the installed JDK.
			
 
				+JAVA_VERSION=1.6.0_07
			
 
				+
			
 
				+# SUPPORTED_ARCHITECTURES = ['i386', 'x86_64']
			
 
				+# The download URL for the Sun JDK. Visit http://java.sun.com/javase/downloads/index.jsp and get the URL for the "Linux self-extracting file".
			
 
				+if [ "$INSTANCE_TYPE" == "m1.small" -o "$INSTANCE_TYPE" == "c1.medium" ]; then
			
 
				+  ARCH='i386'
			
 
				+  BASE_AMI_IMAGE="ami-2b5fba42"  # ec2-public-images/fedora-8-i386-base-v1.07.manifest.xml
			
 
				+  JAVA_BINARY_URL=''
			
 
				+else
			
 
				+  ARCH='x86_64'
			
 
				+  BASE_AMI_IMAGE="ami-2a5fba43"  # ec2-public-images/fedora-8-x86_64-base-v1.07.manifest.xml
			
 
				+  JAVA_BINARY_URL=''
			
 
				+fi
			
 
				+
			
 
				+if [ "$AMI_KERNEL" != "" ]; then
			
 
				+  KERNEL_ARG="--kernel ${AMI_KERNEL}"
			
 
				+fi
			
--- a/common/src/contrib/ec2/bin/hadoop-ec2-init-remote.sh
+++ b/common/src/contrib/ec2/bin/hadoop-ec2-init-remote.sh
@@ -0,0 +1,171 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+#   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+#   you may not use this file except in compliance with the License.
			
 
				+#   You may obtain a copy of the License at
			
 
				+#
			
 
				+#       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+#   Unless required by applicable law or agreed to in writing, software
			
 
				+#   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+#   See the License for the specific language governing permissions and
			
 
				+#   limitations under the License.
			
 
				+
			
 
				+
			
 
				+################################################################################
			
 
				+# Script that is run on each EC2 instance on boot. It is passed in the EC2 user
			
 
				+# data, so should not exceed 16K in size.
			
 
				+################################################################################
			
 
				+
			
 
				+################################################################################
			
 
				+# Initialize variables
			
 
				+################################################################################
			
 
				+
			
 
				+# Slaves are started after the master, and are told its address by sending a
			
 
				+# modified copy of this file which sets the MASTER_HOST variable. 
			
 
				+# A node  knows if it is the master or not by inspecting the security group
			
 
				+# name. If it is the master then it retrieves its address using instance data.
			
 
				+MASTER_HOST=%MASTER_HOST% # Interpolated before being sent to EC2 node
			
 
				+SECURITY_GROUPS=`wget -q -O - http://169.254.169.254/latest/meta-data/security-groups`
			
 
				+IS_MASTER=`echo $SECURITY_GROUPS | awk '{ a = match ($0, "-master$"); if (a) print "true"; else print "false"; }'`
			
 
				+if [ "$IS_MASTER" == "true" ]; then
			
 
				+ # use public hostnames for master. private hostnames can be used by substituting:
			
 
				+ # MASTER_HOST=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname`
			
 
				+ MASTER_HOST=`wget -q -O - 'http://169.254.169.254/latest/meta-data/public-hostname'`
			
 
				+fi
			
 
				+
			
 
				+HADOOP_HOME=`ls -d /usr/local/hadoop-*`
			
 
				+
			
 
				+################################################################################
			
 
				+# Hadoop configuration
			
 
				+# Modify this section to customize your Hadoop cluster.
			
 
				+################################################################################
			
 
				+
			
 
				+cat > $HADOOP_HOME/conf/hadoop-site.xml <<EOF
			
 
				+<?xml version="1.0"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+
			
 
				+<configuration>
			
 
				+
			
 
				+<property>
			
 
				+  <name>hadoop.tmp.dir</name>
			
 
				+  <value>/mnt/hadoop</value>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>fs.default.name</name>
			
 
				+  <value>hdfs://$MASTER_HOST:50001</value>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>mapred.job.tracker</name>
			
 
				+  <value>hdfs://$MASTER_HOST:50002</value>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>tasktracker.http.threads</name>
			
 
				+  <value>80</value>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>mapred.tasktracker.map.tasks.maximum</name>
			
 
				+  <value>3</value>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>mapred.tasktracker.reduce.tasks.maximum</name>
			
 
				+  <value>3</value>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>mapred.output.compress</name>
			
 
				+  <value>true</value>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>mapred.output.compression.type</name>
			
 
				+  <value>BLOCK</value>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>dfs.client.block.write.retries</name>
			
 
				+  <value>3</value>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>hadoop.rpc.socket.factory.class.default</name>
			
 
				+  <value>org.apache.hadoop.net.StandardSocketFactory</value>
			
 
				+  <final>true</final>
			
 
				+</property>
			
 
				+
			
 
				+</configuration>
			
 
				+EOF
			
 
				+
			
 
				+# Configure Hadoop for Ganglia
			
 
				+# overwrite hadoop-metrics.properties
			
 
				+cat > $HADOOP_HOME/conf/hadoop-metrics.properties <<EOF
			
 
				+
			
 
				+# Ganglia
			
 
				+# we push to the master gmond so hostnames show up properly
			
 
				+dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				+dfs.period=10
			
 
				+dfs.servers=$MASTER_HOST:8649
			
 
				+
			
 
				+mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				+mapred.period=10
			
 
				+mapred.servers=$MASTER_HOST:8649
			
 
				+
			
 
				+jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				+jvm.period=10
			
 
				+jvm.servers=$MASTER_HOST:8649
			
 
				+EOF
			
 
				+
			
 
				+################################################################################
			
 
				+# Start services
			
 
				+################################################################################
			
 
				+
			
 
				+[ ! -f /etc/hosts ] &&  echo "127.0.0.1 localhost" > /etc/hosts
			
 
				+
			
 
				+mkdir -p /mnt/hadoop/logs
			
 
				+
			
 
				+# not set on boot
			
 
				+export USER="root"
			
 
				+
			
 
				+if [ "$IS_MASTER" == "true" ]; then
			
 
				+  # MASTER
			
 
				+  # Prep Ganglia
			
 
				+  sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \
			
 
				+         -e "s|\( *bind *=.*\)|#\1|" \
			
 
				+         -e "s|\( *mute *=.*\)|  mute = yes|" \
			
 
				+         -e "s|\( *location *=.*\)|  location = \"master-node\"|" \
			
 
				+         /etc/gmond.conf
			
 
				+  mkdir -p /mnt/ganglia/rrds
			
 
				+  chown -R ganglia:ganglia /mnt/ganglia/rrds
			
 
				+  rm -rf /var/lib/ganglia; cd /var/lib; ln -s /mnt/ganglia ganglia; cd
			
 
				+  service gmond start
			
 
				+  service gmetad start
			
 
				+  apachectl start
			
 
				+
			
 
				+  # Hadoop
			
 
				+  # only format on first boot
			
 
				+  [ ! -e /mnt/hadoop/dfs ] && "$HADOOP_HOME"/bin/hadoop namenode -format
			
 
				+
			
 
				+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start namenode
			
 
				+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start jobtracker
			
 
				+else
			
 
				+  # SLAVE
			
 
				+  # Prep Ganglia
			
 
				+  sed -i -e "s|\( *mcast_join *=.*\)|#\1|" \
			
 
				+         -e "s|\( *bind *=.*\)|#\1|" \
			
 
				+         -e "s|\(udp_send_channel {\)|\1\n  host=$MASTER_HOST|" \
			
 
				+         /etc/gmond.conf
			
 
				+  service gmond start
			
 
				+
			
 
				+  # Hadoop
			
 
				+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start datanode
			
 
				+  "$HADOOP_HOME"/bin/hadoop-daemon.sh start tasktracker
			
 
				+fi
			
 
				+
			
 
				+# Run this script on next boot
			
 
				+rm -f /var/ec2/ec2-run-user-data.*
			
--- a/common/src/contrib/ec2/bin/image/create-hadoop-image-remote
+++ b/common/src/contrib/ec2/bin/image/create-hadoop-image-remote
@@ -0,0 +1,80 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+# Create a Hadoop AMI. Runs on the EC2 instance.
			
 
				+
			
 
				+# Import variables
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+. "$bin"/hadoop-ec2-env.sh
			
 
				+
			
 
				+# Remove environment script since it contains sensitive information
			
 
				+rm -f "$bin"/hadoop-ec2-env.sh
			
 
				+
			
 
				+# Install Java
			
 
				+echo "Downloading and installing java binary."
			
 
				+cd /usr/local
			
 
				+wget -nv -O java.bin $JAVA_BINARY_URL
			
 
				+sh java.bin
			
 
				+rm -f java.bin
			
 
				+
			
 
				+# Install tools
			
 
				+echo "Installing rpms."
			
 
				+yum -y install rsync lynx screen ganglia-gmetad ganglia-gmond ganglia-web httpd php
			
 
				+yum -y clean all
			
 
				+
			
 
				+# Install Hadoop
			
 
				+echo "Installing Hadoop $HADOOP_VERSION."
			
 
				+cd /usr/local
			
 
				+wget -nv http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
			
 
				+[ ! -f hadoop-$HADOOP_VERSION.tar.gz ] && wget -nv http://www.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
			
 
				+tar xzf hadoop-$HADOOP_VERSION.tar.gz
			
 
				+rm -f hadoop-$HADOOP_VERSION.tar.gz
			
 
				+
			
 
				+# Configure Hadoop
			
 
				+sed -i -e "s|# export JAVA_HOME=.*|export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}|" \
			
 
				+       -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/mnt/hadoop/logs|' \
			
 
				+       -e 's|# export HADOOP_SLAVE_SLEEP=.*|export HADOOP_SLAVE_SLEEP=1|' \
			
 
				+       -e 's|# export HADOOP_OPTS=.*|export HADOOP_OPTS=-server|' \
			
 
				+      /usr/local/hadoop-$HADOOP_VERSION/conf/hadoop-env.sh
			
 
				+
			
 
				+# Run user data as script on instance startup
			
 
				+chmod +x /etc/init.d/ec2-run-user-data
			
 
				+echo "/etc/init.d/ec2-run-user-data" >> /etc/rc.d/rc.local
			
 
				+
			
 
				+# Setup root user bash environment
			
 
				+echo "export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}" >> /root/.bash_profile
			
 
				+echo "export HADOOP_HOME=/usr/local/hadoop-${HADOOP_VERSION}" >> /root/.bash_profile
			
 
				+echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> /root/.bash_profile
			
 
				+
			
 
				+# Configure networking.
			
 
				+# Delete SSH authorized_keys since it includes the key it was launched with. (Note that it is re-populated when an instance starts.)
			
 
				+rm -f /root/.ssh/authorized_keys
			
 
				+# Ensure logging in to new hosts is seamless.
			
 
				+echo '    StrictHostKeyChecking no' >> /etc/ssh/ssh_config
			
 
				+
			
 
				+# Bundle and upload image
			
 
				+cd ~root
			
 
				+# Don't need to delete .bash_history since it isn't written until exit.
			
 
				+df -h
			
 
				+ec2-bundle-vol -d /mnt -k /mnt/pk*.pem -c /mnt/cert*.pem -u $AWS_ACCOUNT_ID -s 3072 -p hadoop-$HADOOP_VERSION-$ARCH -r $ARCH
			
 
				+
			
 
				+ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml -a $AWS_ACCESS_KEY_ID -s $AWS_SECRET_ACCESS_KEY
			
 
				+
			
 
				+# End
			
 
				+echo Done
			
--- a/common/src/contrib/ec2/bin/image/ec2-run-user-data
+++ b/common/src/contrib/ec2/bin/image/ec2-run-user-data
@@ -0,0 +1,63 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+#   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+#   you may not use this file except in compliance with the License.
			
 
				+#   You may obtain a copy of the License at
			
 
				+#
			
 
				+#       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+#   Unless required by applicable law or agreed to in writing, software
			
 
				+#   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+#   See the License for the specific language governing permissions and
			
 
				+#   limitations under the License.
			
 
				+
			
 
				+# ec2-run-user-data - Run instance user-data if it looks like a script.
			
 
				+#
			
 
				+# Only retrieves and runs the user-data script once per instance.  If
			
 
				+# you want the user-data script to run again (e.g., on the next boot)
			
 
				+# then add this command in the user-data script:
			
 
				+#   rm -f /var/ec2/ec2-run-user-data.*
			
 
				+#
			
 
				+# History:
			
 
				+#   2008-05-16 Eric Hammond <ehammond@thinksome.com>
			
 
				+#   - Initial version including code from Kim Scheibel, Jorge Oliveira
			
 
				+#   2008-08-06 Tom White
			
 
				+#   - Updated to use mktemp on fedora
			
 
				+#
			
 
				+
			
 
				+prog=$(basename $0)
			
 
				+logger="logger -t $prog"
			
 
				+curl="curl --retry 3 --silent --show-error --fail"
			
 
				+instance_data_url=http://169.254.169.254/2008-02-01
			
 
				+
			
 
				+# Wait until networking is up on the EC2 instance.
			
 
				+perl -MIO::Socket::INET -e '
			
 
				+ until(new IO::Socket::INET("169.254.169.254:80")){print"Waiting for network...\n";sleep 1}
			
 
				+' | $logger
			
 
				+
			
 
				+# Exit if we have already run on this instance (e.g., previous boot).
			
 
				+ami_id=$($curl $instance_data_url/meta-data/ami-id)
			
 
				+been_run_file=/var/ec2/$prog.$ami_id
			
 
				+mkdir -p $(dirname $been_run_file)
			
 
				+if [ -f $been_run_file ]; then
			
 
				+  $logger < $been_run_file
			
 
				+  exit
			
 
				+fi
			
 
				+
			
 
				+# Retrieve the instance user-data and run it if it looks like a script
			
 
				+user_data_file=`mktemp -t ec2-user-data.XXXXXXXXXX`
			
 
				+chmod 700 $user_data_file
			
 
				+$logger "Retrieving user-data"
			
 
				+$curl -o $user_data_file $instance_data_url/user-data 2>&1 | $logger
			
 
				+if [ ! -s $user_data_file ]; then
			
 
				+  $logger "No user-data available"
			
 
				+elif head -1 $user_data_file | egrep -v '^#!'; then
			
 
				+  $logger "Skipping user-data as it does not begin with #!"
			
 
				+else
			
 
				+  $logger "Running user-data"
			
 
				+  echo "user-data has already been run on this instance" > $been_run_file
			
 
				+  $user_data_file 2>&1 | logger -t "user-data"
			
 
				+  $logger "user-data exit code: $?"
			
 
				+fi
			
 
				+rm -f $user_data_file
			
--- a/common/src/contrib/ec2/bin/launch-hadoop-cluster
+++ b/common/src/contrib/ec2/bin/launch-hadoop-cluster
@@ -0,0 +1,42 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Launch an EC2 cluster of Hadoop instances.
			
 
				+
			
 
				+set -o errexit
			
 
				+
			
 
				+# Import variables
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+
			
 
				+if [ -z $1 ]; then
			
 
				+  echo "Cluster name required!"
			
 
				+  exit -1
			
 
				+fi
			
 
				+
			
 
				+if [ -z $2 ]; then
			
 
				+  echo "Must specify the number of slaves to start."
			
 
				+  exit -1
			
 
				+fi
			
 
				+
			
 
				+if ! "$bin"/launch-hadoop-master $1 ; then
			
 
				+  exit $?
			
 
				+fi
			
 
				+
			
 
				+if ! "$bin"/launch-hadoop-slaves $*; then
			
 
				+  exit $?
			
 
				+fi
			
--- a/common/src/contrib/ec2/bin/launch-hadoop-master
+++ b/common/src/contrib/ec2/bin/launch-hadoop-master
@@ -0,0 +1,119 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Launch an EC2 Hadoop master.
			
 
				+
			
 
				+set -o errexit
			
 
				+
			
 
				+if [ -z $1 ]; then
			
 
				+  echo "Cluster name required!"
			
 
				+  exit -1
			
 
				+fi
			
 
				+
			
 
				+CLUSTER=$1
			
 
				+
			
 
				+# Import variables
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+. "$bin"/hadoop-ec2-env.sh
			
 
				+
			
 
				+if [ -z $AWS_ACCOUNT_ID ]; then
			
 
				+  echo "Please set AWS_ACCOUNT_ID in $bin/hadoop-ec2-env.sh."
			
 
				+  exit -1
			
 
				+fi
			
 
				+
			
 
				+echo "Testing for existing master in group: $CLUSTER"
			
 
				+MASTER_EC2_HOST=`ec2-describe-instances | awk '"RESERVATION" == $1 && "'$CLUSTER_MASTER'" == $4, "RESERVATION" == $1 && "'$CLUSTER_MASTER'" != $4'`
			
 
				+MASTER_EC2_HOST=`echo "$MASTER_EC2_HOST" | awk '"INSTANCE" == $1 && "running" == $6 {print $4}'`
			
 
				+
			
 
				+if [ ! -z "$MASTER_EC2_HOST" ]; then
			
 
				+  echo "Master already running on: $MASTER_EC2_HOST"
			
 
				+  MASTER_HOST=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_EC2_HOST | awk '{print $5}'`
			
 
				+  echo $MASTER_HOST > $MASTER_PRIVATE_IP_PATH
			
 
				+  echo $MASTER_EC2_HOST > $MASTER_IP_PATH
			
 
				+  exit 0
			
 
				+fi
			
 
				+
			
 
				+if ! ec2-describe-group $CLUSTER_MASTER > /dev/null 2>&1; then
			
 
				+  echo "Creating group $CLUSTER_MASTER"
			
 
				+  ec2-add-group $CLUSTER_MASTER -d "Group for Hadoop Master."
			
 
				+  ec2-authorize $CLUSTER_MASTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID
			
 
				+  ec2-authorize $CLUSTER_MASTER -p 22    # ssh
			
 
				+
			
 
				+  if [ $ENABLE_WEB_PORTS == "true" ]; then
			
 
				+    ec2-authorize $CLUSTER_MASTER -p 50030 # JobTracker web interface
			
 
				+    ec2-authorize $CLUSTER_MASTER -p 50060 # TaskTracker web interface
			
 
				+    ec2-authorize $CLUSTER_MASTER -p 50070 # NameNode web interface
			
 
				+    ec2-authorize $CLUSTER_MASTER -p 50075 # DataNode web interface
			
 
				+  fi
			
 
				+fi
			
 
				+
			
 
				+if ! ec2-describe-group $CLUSTER > /dev/null 2>&1; then
			
 
				+  echo "Creating group $CLUSTER"
			
 
				+  ec2-add-group $CLUSTER -d "Group for Hadoop Slaves."
			
 
				+  ec2-authorize $CLUSTER -o $CLUSTER -u $AWS_ACCOUNT_ID
			
 
				+  ec2-authorize $CLUSTER -p 22    # ssh
			
 
				+
			
 
				+  if [ $ENABLE_WEB_PORTS == "true" ]; then
			
 
				+    ec2-authorize $CLUSTER -p 50030 # JobTracker web interface
			
 
				+    ec2-authorize $CLUSTER -p 50060 # TaskTracker web interface
			
 
				+    ec2-authorize $CLUSTER -p 50070 # NameNode web interface
			
 
				+    ec2-authorize $CLUSTER -p 50075 # DataNode web interface
			
 
				+  fi
			
 
				+
			
 
				+  ec2-authorize $CLUSTER_MASTER -o $CLUSTER -u $AWS_ACCOUNT_ID
			
 
				+  ec2-authorize $CLUSTER -o $CLUSTER_MASTER -u $AWS_ACCOUNT_ID
			
 
				+fi
			
 
				+
			
 
				+# Finding Hadoop image
			
 
				+AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH | grep available | awk '{print $2}'`
			
 
				+
			
 
				+# Start a master
			
 
				+echo "Starting master with AMI $AMI_IMAGE"
			
 
				+USER_DATA="MASTER_HOST=master,MAX_MAP_TASKS=$MAX_MAP_TASKS,MAX_REDUCE_TASKS=$MAX_REDUCE_TASKS,COMPRESS=$COMPRESS"
			
 
				+INSTANCE=`ec2-run-instances $AMI_IMAGE -n 1 -g $CLUSTER_MASTER -k $KEY_NAME -f "$bin"/$USER_DATA_FILE -t $INSTANCE_TYPE $KERNEL_ARG | grep INSTANCE | awk '{print $2}'`
			
 
				+echo "Waiting for instance $INSTANCE to start"
			
 
				+while true; do
			
 
				+  printf "."
			
 
				+  # get private dns
			
 
				+  MASTER_HOST=`ec2-describe-instances $INSTANCE | grep running | awk '{print $5}'`
			
 
				+  if [ ! -z $MASTER_HOST ]; then
			
 
				+    echo "Started as $MASTER_HOST"
			
 
				+    break;
			
 
				+  fi
			
 
				+  sleep 1
			
 
				+done
			
 
				+
			
 
				+MASTER_EC2_HOST=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_HOST | awk '{print $4}'`
			
 
				+echo $MASTER_HOST > $MASTER_PRIVATE_IP_PATH
			
 
				+echo $MASTER_EC2_HOST > $MASTER_IP_PATH
			
 
				+MASTER_EC2_ZONE=`ec2-describe-instances $INSTANCE | grep INSTANCE | grep running | grep $MASTER_HOST | awk '{print $11}'`
			
 
				+echo $MASTER_EC2_ZONE > $MASTER_ZONE_PATH
			
 
				+
			
 
				+while true; do
			
 
				+  if ssh $SSH_OPTS "root@$MASTER_EC2_HOST" 'echo "hello"' > /dev/null 2>&1; then
			
 
				+   break;
			
 
				+  fi
			
 
				+  sleep 5
			
 
				+done
			
 
				+
			
 
				+echo "Copying private key to master"
			
 
				+scp $SSH_OPTS $PRIVATE_KEY_PATH "root@$MASTER_EC2_HOST:/root/.ssh/id_rsa"
			
 
				+ssh $SSH_OPTS "root@$MASTER_EC2_HOST" "chmod 600 /root/.ssh/id_rsa"
			
 
				+
			
 
				+MASTER_IP=`dig +short $MASTER_EC2_HOST`
			
 
				+echo "Master is $MASTER_EC2_HOST, ip is $MASTER_IP, zone is $MASTER_EC2_ZONE."
			
--- a/common/src/contrib/ec2/bin/launch-hadoop-slaves
+++ b/common/src/contrib/ec2/bin/launch-hadoop-slaves
@@ -0,0 +1,59 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Launch an EC2 Hadoop slaves.
			
 
				+
			
 
				+set -o errexit
			
 
				+
			
 
				+if [ -z $1 ]; then
			
 
				+  echo "Cluster name required!"
			
 
				+  exit -1
			
 
				+fi
			
 
				+
			
 
				+if [ -z $2 ]; then
			
 
				+  echo "Must specify the number of slaves to start."
			
 
				+  exit -1
			
 
				+fi
			
 
				+
			
 
				+CLUSTER=$1
			
 
				+NO_INSTANCES=$2
			
 
				+
			
 
				+# Import variables
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+. "$bin"/hadoop-ec2-env.sh
			
 
				+
			
 
				+if [ ! -f $MASTER_IP_PATH ]; then
			
 
				+  echo "Must start Cluster Master first!"
			
 
				+  exit -1
			
 
				+fi
			
 
				+
			
 
				+# Finding Hadoop image
			
 
				+AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep $ARCH |grep available | awk '{print $2}'`
			
 
				+# to use private master hostname, substitute below with:
			
 
				+# MASTER_HOST=`cat $MASTER_PRIVATE_IP_PATH`
			
 
				+MASTER_HOST=`cat $MASTER_IP_PATH`
			
 
				+MASTER_ZONE=`cat $MASTER_ZONE_PATH`
			
 
				+
			
 
				+# Substituting master hostname
			
 
				+sed -e "s|%MASTER_HOST%|$MASTER_HOST|" "$bin"/$USER_DATA_FILE > "$bin"/$USER_DATA_FILE.slave
			
 
				+
			
 
				+# Start slaves
			
 
				+echo "Adding $1 node(s) to cluster group $CLUSTER with AMI $AMI_IMAGE"
			
 
				+ec2-run-instances $AMI_IMAGE -n "$NO_INSTANCES" -g "$CLUSTER" -k "$KEY_NAME" -f "$bin"/$USER_DATA_FILE.slave -t "$INSTANCE_TYPE" -z "$MASTER_ZONE" $KERNEL_ARG | grep INSTANCE | awk '{print $2}'
			
 
				+
			
 
				+rm "$bin"/$USER_DATA_FILE.slave
			
--- a/common/src/contrib/ec2/bin/list-hadoop-clusters
+++ b/common/src/contrib/ec2/bin/list-hadoop-clusters
@@ -0,0 +1,33 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# List running clusters.
			
 
				+
			
 
				+set -o errexit
			
 
				+
			
 
				+# Import variables
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+. "$bin"/hadoop-ec2-env.sh
			
 
				+
			
 
				+# Finding Hadoop clusters
			
 
				+CLUSTERS=`ec2-describe-instances | awk '"RESERVATION" == $1 && $4 ~ /-master$/, "INSTANCE" == $1' | tr '\n' '\t' | grep running | cut -f4 | rev | cut -d'-' -f2- | rev`
			
 
				+
			
 
				+[ -z "$CLUSTERS" ] && echo "No running clusters." && exit 0
			
 
				+
			
 
				+echo "Running Hadoop clusters:"
			
 
				+echo "$CLUSTERS"
			
--- a/common/src/contrib/ec2/bin/terminate-hadoop-cluster
+++ b/common/src/contrib/ec2/bin/terminate-hadoop-cluster
@@ -0,0 +1,48 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# Terminate a cluster.
			
 
				+
			
 
				+set -o errexit
			
 
				+
			
 
				+if [ -z $1 ]; then
			
 
				+  echo "Cluster name required!"
			
 
				+  exit -1
			
 
				+fi
			
 
				+
			
 
				+CLUSTER=$1
			
 
				+
			
 
				+# Import variables
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+. "$bin"/hadoop-ec2-env.sh
			
 
				+
			
 
				+# Finding Hadoop image
			
 
				+HADOOP_INSTANCES=`ec2-describe-instances | awk '"RESERVATION" == $1 && ("'$CLUSTER'" == $4 || "'$CLUSTER_MASTER'" == $4), "RESERVATION" == $1 && ("'$CLUSTER'" != $4 && "'$CLUSTER_MASTER'" != $4)'`
			
 
				+HADOOP_INSTANCES=`echo "$HADOOP_INSTANCES" | grep INSTANCE | grep running`
			
 
				+
			
 
				+[ -z "$HADOOP_INSTANCES" ] && echo "No running instances in cluster $CLUSTER." && exit 0
			
 
				+
			
 
				+echo "Running Hadoop instances:"
			
 
				+echo "$HADOOP_INSTANCES"
			
 
				+read -p "Terminate all instances? [yes or no]: " answer
			
 
				+
			
 
				+if [ "$answer" != "yes" ]; then
			
 
				+  exit 1
			
 
				+fi
			
 
				+
			
 
				+ec2-terminate-instances `echo "$HADOOP_INSTANCES" | awk '{print $2}'`
			
--- a/common/src/contrib/failmon/README
+++ b/common/src/contrib/failmon/README
@@ -0,0 +1,97 @@
 
				+****************** FailMon Quick Start Guide ***********************
			
 
				+
			
 
				+This document is a guide to quickly setting up and running FailMon.
			
 
				+For more information and details please see the FailMon User Manual.
			
 
				+
			
 
				+***** Building FailMon *****
			
 
				+
			
 
				+Normally, FailMon lies under <hadoop-dir>/src/contrib/failmon, where
			
 
				+<hadoop-source-dir> is the Hadoop project root folder. To compile it,
			
 
				+one can either run ant for the whole Hadoop project, i.e.:
			
 
				+
			
 
				+$ cd <hadoop-dir>
			
 
				+$ ant
			
 
				+
			
 
				+or run ant only for FailMon:
			
 
				+
			
 
				+$ cd <hadoop-dir>/src/contrib/failmon
			
 
				+$ ant
			
 
				+
			
 
				+The above will compile FailMon and place all class files under
			
 
				+<hadoop-dir>/build/contrib/failmon/classes.
			
 
				+
			
 
				+By invoking:
			
 
				+
			
 
				+$ cd <hadoop-dir>/src/contrib/failmon
			
 
				+$ ant tar
			
 
				+
			
 
				+FailMon is packaged as a standalone jar application in
			
 
				+<hadoop-dir>/src/contrib/failmon/failmon.tar.gz.
			
 
				+
			
 
				+
			
 
				+***** Deploying FailMon *****
			
 
				+
			
 
				+There are two ways FailMon can be deployed in a cluster:
			
 
				+
			
 
				+a) Within Hadoop, in which case the whole Hadoop package is uploaded
			
 
				+to the cluster nodes. In that case, nothing else needs to be done on
			
 
				+individual nodes.
			
 
				+
			
 
				+b) Independently of the Hadoop deployment, i.e., by uploading
			
 
				+failmon.tar.gz to all nodes and uncompressing it. In that case, the
			
 
				+bin/failmon.sh script needs to be edited; environment variable
			
 
				+HADOOPDIR should point to the root directory of the Hadoop
			
 
				+distribution. Also the location of the Hadoop configuration files
			
 
				+should be pointed by the property 'hadoop.conf.path' in file
			
 
				+conf/failmon.properties. Note that these files refer to the HDFS in
			
 
				+which we want to store the FailMon data (which can potentially be
			
 
				+different than the one on the cluster we are monitoring).
			
 
				+
			
 
				+We assume that either way FailMon is placed in the same directory on
			
 
				+all nodes, which is typical for most clusters. If this is not
			
 
				+feasible, one should create the same symbolic link on all nodes of the
			
 
				+cluster, that points to the FailMon directory of each node.
			
 
				+
			
 
				+One should also edit the conf/failmon.properties file on each node to
			
 
				+set his own property values. However, the default values are expected
			
 
				+to serve most practical cases. Refer to the FailMon User Manual about
			
 
				+the various properties and configuration parameters.
			
 
				+
			
 
				+
			
 
				+***** Running FailMon *****
			
 
				+
			
 
				+In order to run FailMon using a node to do the ad-hoc scheduling of
			
 
				+monitoring jobs, one needs edit the hosts.list file to specify the
			
 
				+list of machine hostnames on which FailMon is to be run. Also, in file
			
 
				+conf/global.config the username used to connect to the machines has to
			
 
				+be specified (passwordless SSH is assumed) in property 'ssh.username'.
			
 
				+In property 'failmon.dir', the path to the FailMon folder has to be
			
 
				+specified as well (it is assumed to be the same on all machines in the
			
 
				+cluster). Then one only needs to invoke the command:
			
 
				+
			
 
				+$ cd <hadoop-dir>
			
 
				+$ bin/scheduler.py
			
 
				+
			
 
				+to start the system.
			
 
				+
			
 
				+
			
 
				+***** Merging HDFS files *****
			
 
				+
			
 
				+For the purpose of merging the files created on HDFS by FailMon, the
			
 
				+following command can be used:
			
 
				+
			
 
				+$ cd <hadoop-dir>
			
 
				+$ bin/failmon.sh --mergeFiles
			
 
				+
			
 
				+This will concatenate all files in the HDFS folder (pointed to by the
			
 
				+'hdfs.upload.dir' property in conf/failmon.properties file) into a
			
 
				+single file, which will be placed in the same folder. Also the
			
 
				+location of the Hadoop configuration files should be pointed by the
			
 
				+property 'hadoop.conf.path' in file conf/failmon.properties. Note that
			
 
				+these files refer to the HDFS in which have stored the FailMon data
			
 
				+(which can potentially be different than the one on the cluster we are
			
 
				+monitoring). Also, the scheduler.py script can be set up to merge the
			
 
				+HDFS files when their number surpasses a configurable limit (see
			
 
				+'conf/global.config' file).
			
 
				+
			
 
				+Please refer to the FailMon User Manual for more details.
			
--- a/common/src/contrib/failmon/bin/failmon.sh
+++ b/common/src/contrib/failmon/bin/failmon.sh
@@ -0,0 +1,54 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+# First we need to determine whether Failmon has been distributed with
			
 
				+# Hadoop, or as standalone. In the latter case failmon.jar will lie in
			
 
				+# the current directory.
			
 
				+
			
 
				+JARNAME="failmon.jar"
			
 
				+HADOOPDIR=""
			
 
				+CLASSPATH=""
			
 
				+
			
 
				+if [ `ls -l | grep src | wc -l` == 0 ]
			
 
				+then
			
 
				+    # standalone binary
			
 
				+    if [ -n $1 ] && [ "$1" == "--mergeFiles" ]
			
 
				+    then
			
 
				+	jar -ufe $JARNAME org.apache.hadoop.contrib.failmon.HDFSMerger
			
 
				+        java -jar $JARNAME
			
 
				+    else
			
 
				+    	jar -ufe $JARNAME org.apache.hadoop.contrib.failmon.RunOnce
			
 
				+	java -jar $JARNAME $*
			
 
				+    fi
			
 
				+else
			
 
				+    # distributed with Hadoop
			
 
				+    HADOOPDIR=`pwd`/../../../
			
 
				+    CLASSPATH=$CLASSPATH:$HADOOPDIR/build/contrib/failmon/classes
			
 
				+    CLASSPATH=$CLASSPATH:$HADOOPDIR/build/classes
			
 
				+    CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/commons-logging-api-1*.jar`
			
 
				+    CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/commons-logging-1*.jar`
			
 
				+    CLASSPATH=$CLASSPATH:`ls -1 $HADOOPDIR/lib/log4j-*.jar`
			
 
				+#    echo $CLASSPATH
			
 
				+    if [ -n $1 ] && [ "$1" == "--mergeFiles" ]
			
 
				+    then
			
 
				+        java -cp $CLASSPATH org.apache.hadoop.contrib.failmon.HDFSMerger
			
 
				+    else
			
 
				+        java -cp $CLASSPATH org.apache.hadoop.contrib.failmon.RunOnce $*
			
 
				+    fi
			
 
				+fi
			
 
				+
			
--- a/common/src/contrib/failmon/bin/scheduler.py
+++ b/common/src/contrib/failmon/bin/scheduler.py
@@ -0,0 +1,235 @@
 
				+#!/usr/bin/python
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+# Schedule FailMon execution for nodes of file hosts.list, according to
			
 
				+# the properties file conf/global.config.
			
 
				+
			
 
				+import time
			
 
				+import ConfigParser
			
 
				+import subprocess
			
 
				+import threading
			
 
				+import random
			
 
				+
			
 
				+jobs = []
			
 
				+username = "user"
			
 
				+connections = 10
			
 
				+failmonDir = ""
			
 
				+maxFiles = 100
			
 
				+
			
 
				+# This class represents a thread that connects to a set of cluster
			
 
				+# nodes to locally execute monitoring jobs. These jobs are specified
			
 
				+# as a shell command in the constructor.
			
 
				+class sshThread (threading.Thread):
			
 
				+
			
 
				+    def __init__(self, threadname, username, command, failmonDir):
			
 
				+        threading.Thread.__init__(self)
			
 
				+        self.name = threadname
			
 
				+        self.username = username
			
 
				+        self.command = command
			
 
				+        self.failmonDir = failmonDir
			
 
				+        self.hosts = []
			
 
				+
			
 
				+    def addHost(self, host):
			
 
				+        self.hosts.append(host)
			
 
				+        
			
 
				+    def run (self):
			
 
				+        for host in self.hosts:
			
 
				+            toRun = ["ssh", self.username + "@" + host, "cd " + self.failmonDir + " ; " + self.command]
			
 
				+            print "Thread", self.name, "invoking command on", host, ":\t", toRun, "...",
			
 
				+            subprocess.check_call(toRun)
			
 
				+            print "Done!"
			
 
				+
			
 
				+# This class represents a monitoring job. The param member is a string
			
 
				+# that can be passed in the '--only' list of jobs given to the Java
			
 
				+# class org.apache.hadoop.contrib.failmon.RunOnce for execution on a
			
 
				+# node.
			
 
				+class Job:
			
 
				+    def __init__(self, param, interval):
			
 
				+        self.param = param
			
 
				+        self.interval = interval
			
 
				+        self.counter = interval
			
 
				+        return
			
 
				+
			
 
				+    def reset(self):
			
 
				+        self.counter = self.interval
			
 
				+
			
 
				+# This function reads the configuration file to get the values of the
			
 
				+# configuration parameters.
			
 
				+def getJobs(file):
			
 
				+    global username
			
 
				+    global connections
			
 
				+    global jobs
			
 
				+    global failmonDir
			
 
				+    global maxFiles
			
 
				+    
			
 
				+    conf = ConfigParser.SafeConfigParser()
			
 
				+    conf.read(file)
			
 
				+
			
 
				+    username = conf.get("Default", "ssh.username")
			
 
				+    connections = int(conf.get("Default", "max.connections"))
			
 
				+    failmonDir = conf.get("Default", "failmon.dir")
			
 
				+    maxFiles = conf.get("Default", "hdfs.files.max")
			
 
				+    
			
 
				+    # Hadoop Log
			
 
				+    interval = int(conf.get("Default", "log.hadoop.interval"))
			
 
				+
			
 
				+    if interval != 0:
			
 
				+        jobs.append(Job("hadoopLog", interval))
			
 
				+
			
 
				+    # System Log
			
 
				+    interval = int(conf.get("Default", "log.system.interval"))
			
 
				+
			
 
				+    if interval != 0:
			
 
				+        jobs.append(Job("systemLog", interval))
			
 
				+
			
 
				+    # NICs
			
 
				+    interval = int(conf.get("Default", "nics.interval"))
			
 
				+
			
 
				+    if interval != 0:
			
 
				+        jobs.append(Job("nics", interval))
			
 
				+
			
 
				+    # CPU
			
 
				+    interval = int(conf.get("Default", "cpu.interval"))
			
 
				+
			
 
				+    if interval != 0:
			
 
				+        jobs.append(Job("cpu", interval))
			
 
				+
			
 
				+    # CPU
			
 
				+    interval = int(conf.get("Default", "disks.interval"))
			
 
				+
			
 
				+    if interval != 0:
			
 
				+        jobs.append(Job("disks", interval))
			
 
				+
			
 
				+    # sensors
			
 
				+    interval = int(conf.get("Default", "sensors.interval"))
			
 
				+
			
 
				+    if interval != 0:
			
 
				+        jobs.append(Job("sensors", interval))
			
 
				+
			
 
				+    # upload
			
 
				+    interval = int(conf.get("Default", "upload.interval"))
			
 
				+
			
 
				+    if interval != 0:
			
 
				+        jobs.append(Job("upload", interval))
			
 
				+
			
 
				+    return
			
 
				+
			
 
				+
			
 
				+# Compute the gcd (Greatest Common Divisor) of two integerss
			
 
				+def GCD(a, b):
			
 
				+    assert isinstance(a, int)
			
 
				+    assert isinstance(b, int)
			
 
				+
			
 
				+    while a:
			
 
				+        a, b = b%a, a
			
 
				+
			
 
				+    return b
			
 
				+
			
 
				+# Compute the gcd (Greatest Common Divisor) of a list of integers
			
 
				+def listGCD(joblist):
			
 
				+    assert isinstance(joblist, list)
			
 
				+
			
 
				+    if (len(joblist) == 1):
			
 
				+        return joblist[0].interval
			
 
				+
			
 
				+    g = GCD(joblist[0].interval, joblist[1].interval)
			
 
				+
			
 
				+    for i in range (2, len(joblist)):
			
 
				+        g = GCD(g, joblist[i].interval)
			
 
				+        
			
 
				+    return g
			
 
				+
			
 
				+# Merge all failmon files created on the HDFS into a single file
			
 
				+def mergeFiles():
			
 
				+    global username
			
 
				+    global failmonDir
			
 
				+    hostList = []
			
 
				+    hosts = open('./conf/hosts.list', 'r')
			
 
				+    for host in hosts:
			
 
				+        hostList.append(host.strip().rstrip())
			
 
				+    randomHost = random.sample(hostList, 1)
			
 
				+    mergeCommand = "bin/failmon.sh --mergeFiles"
			
 
				+    toRun = ["ssh", username + "@" + randomHost[0], "cd " + failmonDir + " ; " + mergeCommand]
			
 
				+    print "Invoking command on", randomHost, ":\t", mergeCommand, "...",
			
 
				+    subprocess.check_call(toRun)
			
 
				+    print "Done!"
			
 
				+    return
			
 
				+
			
 
				+# The actual scheduling is done here
			
 
				+def main():
			
 
				+    getJobs("./conf/global.config")
			
 
				+
			
 
				+    for job in jobs:
			
 
				+        print "Configuration: ", job.param, "every", job.interval, "seconds"
			
 
				+        
			
 
				+    globalInterval = listGCD(jobs)
			
 
				+        
			
 
				+    while True :
			
 
				+        time.sleep(globalInterval)
			
 
				+        params = []
			
 
				+        
			
 
				+        for job in jobs:
			
 
				+            job.counter -= globalInterval
			
 
				+            
			
 
				+            if (job.counter <= 0):
			
 
				+                params.append(job.param)
			
 
				+                job.reset()
			
 
				+                
			
 
				+        if (len(params) == 0):
			
 
				+            continue;
			
 
				+                    
			
 
				+        onlyStr = "--only " + params[0]
			
 
				+        for i in range(1, len(params)):
			
 
				+            onlyStr += ',' + params[i] 
			
 
				+                
			
 
				+        command = "bin/failmon.sh " + onlyStr
			
 
				+
			
 
				+        # execute on all nodes
			
 
				+        hosts = open('./conf/hosts.list', 'r')
			
 
				+        threadList = []
			
 
				+        # create a thread for every connection
			
 
				+        for i in range(0, connections):
			
 
				+            threadList.append(sshThread(i, username, command, failmonDir))
			
 
				+
			
 
				+        # assign some hosts/connections hosts to every thread
			
 
				+        cur = 0;
			
 
				+        for host in hosts:
			
 
				+            threadList[cur].addHost(host.strip().rstrip())
			
 
				+            cur += 1
			
 
				+            if (cur == len(threadList)):
			
 
				+                cur = 0    
			
 
				+
			
 
				+        for ready in threadList:
			
 
				+            ready.start()
			
 
				+
			
 
				+        for ssht in threading.enumerate():
			
 
				+            if ssht != threading.currentThread():
			
 
				+                ssht.join()
			
 
				+
			
 
				+        # if an upload has been done, then maybe we need to merge the
			
 
				+        # HDFS files
			
 
				+        if "upload" in params:
			
 
				+            mergeFiles()
			
 
				+
			
 
				+    return
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
 
				+
			
--- a/common/src/contrib/failmon/build.xml
+++ b/common/src/contrib/failmon/build.xml
@@ -0,0 +1,120 @@
 
				+<?xml version="1.0"?>
			
 
				+
			
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<project name="failmon" default="compile">
			
 
				+
			
 
				+  <import file="../build-contrib.xml"/>
			
 
				+
			
 
				+  <property name="jarfile" value="${build.dir}/${name}.jar"/>
			
 
				+
			
 
				+  <target name="jar" depends="compile" unless="skip.contrib">
			
 
				+    <!-- Make sure that the hadoop jar has been created -->
			
 
				+<!-- This works, but causes findbugs to fail
			
 
				+    <subant antfile="build.xml" target="jar">
			
 
				+      <fileset dir="../../.." includes="build.xml"/>
			
 
				+    </subant>
			
 
				+-->
			
 
				+    <!-- Copy the required files so that the jar can run independently
			
 
				+	 of Hadoop source code -->
			
 
				+    
			
 
				+  <!-- create the list of files to add to the classpath -->
			
 
				+  <fileset dir="${hadoop.root}/lib" id="class.path">
			
 
				+    <include name="**/*.jar" />
			
 
				+    <exclude name="**/excluded/" />
			
 
				+  </fileset>
			
 
				+  
			
 
				+  <pathconvert pathsep=" " property="failmon-class-path" refid="class.path">
			
 
				+    <map from="${basedir}/" to=""/>
			
 
				+  </pathconvert>
			
 
				+
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <jar jarfile="${jarfile}" basedir="${build.classes}">
			
 
				+      <manifest>
			
 
				+        <attribute name="Main-Class" value="org.apache.hadoop.contrib.failmon.RunOnce"/>
			
 
				+	<attribute name="Class-Path" value="${failmon-class-path}"/> 
			
 
				+      </manifest>
			
 
				+    </jar>
			
 
				+
			
 
				+  </target>
			
 
				+
			
 
				+  
			
 
				+  <!-- Override test target to copy sample data -->
			
 
				+  <target name="test" depends="compile-test, compile, compile-examples" if="test.available">
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <delete dir="${hadoop.log.dir}"/>
			
 
				+    <mkdir dir="${hadoop.log.dir}"/>
			
 
				+    <delete dir="${build.test}/sample"/>
			
 
				+    <mkdir dir="${build.test}/sample"/>
			
 
				+    <copy todir="${build.test}/sample">
			
 
				+      <fileset dir="${root}/sample"/>
			
 
				+    </copy>
			
 
				+    <junit
			
 
				+      printsummary="yes" showoutput="${test.output}" 
			
 
				+      haltonfailure="no" fork="yes" maxmemory="256m"
			
 
				+      errorProperty="tests.failed" failureProperty="tests.failed"
			
 
				+      timeout="${test.timeout}">
			
 
				+      
			
 
				+      <sysproperty key="test.build.data" value="${build.test}/data"/>
			
 
				+      <sysproperty key="build.test" value="${build.test}"/>
			
 
				+      <sysproperty key="contrib.name" value="${name}"/>
			
 
				+      
			
 
				+      <!-- requires fork=yes for: 
			
 
				+        relative File paths to use the specified user.dir 
			
 
				+        classpath to use build/contrib/*.jar
			
 
				+      -->
			
 
				+      <sysproperty key="user.dir" value="${build.test}/data"/>
			
 
				+      
			
 
				+      <sysproperty key="fs.default.name" value="${fs.default.name}"/>
			
 
				+      <sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
			
 
				+      <sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/>
			
 
				+      <classpath refid="test.classpath"/>
			
 
				+      <formatter type="${test.junit.output.format}" />
			
 
				+      <batchtest todir="${build.test}" unless="testcase">
			
 
				+        <fileset dir="${src.test}"
			
 
				+                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
			
 
				+      </batchtest>
			
 
				+      <batchtest todir="${build.test}" if="testcase">
			
 
				+        <fileset dir="${src.test}" includes="**/${testcase}.java"/>
			
 
				+      </batchtest>
			
 
				+    </junit>
			
 
				+    <fail if="tests.failed">Tests failed!</fail>
			
 
				+
			
 
				+  </target>
			
 
				+  
			
 
				+  <target name="tar" depends="jar">
			
 
				+
			
 
				+    <copy todir=".">
			
 
				+      <fileset dir="${hadoop.root}/build/contrib/failmon/"
			
 
				+	       includes="failmon.jar"/>
			
 
				+    </copy>
			
 
				+    
			
 
				+    <tar tarfile="${name}.tar" 
			
 
				+	 basedir=".." 
			
 
				+	 includes="${name}/**"
			
 
				+	 excludes="${name}/${name}.tar.gz, ${name}/src/**, ${name}/logs/**, ${name}/build.xml*"/>
			
 
				+    <gzip zipfile="${name}.tar.gz" src="${name}.tar"/>
			
 
				+    <delete file="${name}.tar"/>
			
 
				+    <delete file="${name}.jar"/>
			
 
				+
			
 
				+    <move file="${name}.tar.gz" todir="${build.dir}"/>
			
 
				+    <echo message= "${hadoop.root}/build/contrib/failmon/${name}.jar"/>
			
 
				+    
			
 
				+  </target>
			
 
				+  
			
 
				+</project>
			
--- a/common/src/contrib/failmon/conf/commons-logging.properties
+++ b/common/src/contrib/failmon/conf/commons-logging.properties
@@ -0,0 +1,25 @@
 
				+#
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+# 
			
 
				+#      http://www.apache.org/licenses/LICENSE-2.0
			
 
				+# 
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+
			
 
				+#Logging Implementation
			
 
				+
			
 
				+#Log4J
			
 
				+org.apache.commons.logging.Log=org.apache.commons.logging.impl.Log4JLogger
			
 
				+
			
 
				+#JDK Logger
			
 
				+#org.apache.commons.logging.Log=org.apache.commons.logging.impl.Jdk14Logger
			
--- a/common/src/contrib/failmon/conf/failmon.properties
+++ b/common/src/contrib/failmon/conf/failmon.properties
@@ -0,0 +1,80 @@
 
				+#
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#      http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+
			
 
				+# HDFS properties
			
 
				+hdfs.upload.dir = /failmon
			
 
				+hadoop.conf.path = ../../../conf
			
 
				+
			
 
				+# Hadoop Log file properties
			
 
				+log.hadoop.enabled = true
			
 
				+log.hadoop.filenames = /home/hadoop/hadoop-0.17.0/logs/
			
 
				+# set to non-zero only for continous mode:
			
 
				+log.hadoop.interval = 0
			
 
				+log.hadoop.dateformat = \\d{4}-\\d{2}-\\d{2}
			
 
				+log.hadoop.timeformat = \\d{2}:\\d{2}:\\d{2}
			
 
				+
			
 
				+# System Log file properties
			
 
				+log.system.enabled = true
			
 
				+log.system.filenames = /var/log/messages
			
 
				+# set to non-zero only for continous mode:
			
 
				+log.system.interval = 0
			
 
				+log.system.dateformat = (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s+(\\d+)
			
 
				+log.system.timeformat = \\d{2}:\\d{2}:\\d{2}
			
 
				+
			
 
				+# Network Interfaces
			
 
				+nic.enabled = true
			
 
				+nic.list = eth0, eth1
			
 
				+# set to non-zero only for continous mode:
			
 
				+nic.interval = 0
			
 
				+
			
 
				+# CPUs & Motherboard
			
 
				+cpu.enabled = true
			
 
				+# set to non-zero only for continous mode:
			
 
				+cpu.interval = 0
			
 
				+
			
 
				+# Disk devices. For all devices listed under disks.list, the corresponding
			
 
				+# property disk./dev/xxx.source specifies where the output of 
			
 
				+# "sudo smartctl --all /dev/xxx" can be read by a user. If this property is
			
 
				+# missing, super-user privileges are assumed and the smartctl command will be 
			
 
				+# invoked itself.
			
 
				+
			
 
				+disks.enabled = true
			
 
				+disks.list = /dev/sda, /dev/sdb, /dev/sdc, /dev/sdd, /dev/hda, /dev/hdb, /dev/hdc, /dev/hdd
			
 
				+#disks./dev/sda.source = hda.smart
			
 
				+# set to non-zero only for continous mode:
			
 
				+disks.interval = 0
			
 
				+
			
 
				+# lm-sensors polling
			
 
				+sensors.enabled = true
			
 
				+# set to non-zero only for continous mode:
			
 
				+sensors.interval = 0
			
 
				+
			
 
				+# Executor thread properties	
			
 
				+executor.interval.min = 1	
			
 
				+
			
 
				+# Anonymization properties
			
 
				+anonymizer.hash.hostnames = false
			
 
				+anonymizer.hash.ips = false
			
 
				+anonymizer.hash.filenames = false
			
 
				+anonymizer.hostname.suffix = apache.org
			
 
				+
			
 
				+# Local files options
			
 
				+local.tmp.filename = failmon.dat
			
 
				+local.tmp.compression = false
			
 
				+# set to non-zero only for continous mode:
			
 
				+local.upload.interval = 0
			
--- a/common/src/contrib/failmon/conf/global.config
+++ b/common/src/contrib/failmon/conf/global.config
@@ -0,0 +1,39 @@
 
				+[Default]
			
 
				+#
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#      http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# general settings
			
 
				+
			
 
				+# the username to use to connect to cluster nodes
			
 
				+ssh.username = user
			
 
				+# the maximum number of SSH connections to keep open at any time
			
 
				+max.connections = 2
			
 
				+# the directory in which FailMon lies 
			
 
				+failmon.dir = /home/user/hadoop-core-trunk/src/contrib/failmon
			
 
				+# the maximum number of HDFS files to allow FailMon to create. After
			
 
				+# this limit is surpassed, all HDFS files will be concatenated into
			
 
				+# one file.
			
 
				+hdfs.files.max = 100
			
 
				+
			
 
				+# iteration intervals
			
 
				+log.hadoop.interval = 0
			
 
				+log.system.interval = 0
			
 
				+nics.interval = 10
			
 
				+cpu.interval = 10
			
 
				+disks.interval = 0
			
 
				+sensors.interval = 0
			
 
				+upload.interval = 20
			
--- a/common/src/contrib/failmon/conf/hosts.list
+++ b/common/src/contrib/failmon/conf/hosts.list
@@ -0,0 +1,10 @@
 
				+host00
			
 
				+host01
			
 
				+host02
			
 
				+host03
			
 
				+host04
			
 
				+host05
			
 
				+host06
			
 
				+host07
			
 
				+host08
			
 
				+host09
			
--- a/common/src/contrib/failmon/conf/log4j.properties
+++ b/common/src/contrib/failmon/conf/log4j.properties
@@ -0,0 +1,40 @@
 
				+#
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#      http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+# Define some default values that can be overridden by system properties
			
 
				+failmon.log.dir=logs
			
 
				+failmon.log.file=failmon.log
			
 
				+
			
 
				+log4j.rootLogger= INFO, simpleFile, console
			
 
				+
			
 
				+# Logging Threshold
			
 
				+log4j.threshhold=ALL
			
 
				+
			
 
				+#
			
 
				+# console
			
 
				+# Add "console" to rootlogger above if you want to use this 
			
 
				+#
			
 
				+
			
 
				+log4j.appender.console=org.apache.log4j.ConsoleAppender
			
 
				+log4j.appender.console.target=System.err
			
 
				+log4j.appender.console.layout=org.apache.log4j.PatternLayout
			
 
				+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
			
 
				+
			
 
				+log4j.appender.simpleFile=org.apache.log4j.FileAppender
			
 
				+log4j.appender.simpleFile.layout=org.apache.log4j.PatternLayout
			
 
				+log4j.appender.simpleFile.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
			
 
				+log4j.appender.simpleFile.file= ${failmon.log.dir}/${failmon.log.file}
			
--- a/common/src/contrib/failmon/ivy.xml
+++ b/common/src/contrib/failmon/ivy.xml
@@ -0,0 +1,52 @@
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<ivy-module version="1.0">
			
 
				+  <info organisation="org.apache.hadoop" module="${ant.project.name}">
			
 
				+    <license name="Apache 2.0"/>
			
 
				+    <ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
			
 
				+    <description>
			
 
				+        Apache Hadoop
			
 
				+    </description>
			
 
				+  </info>
			
 
				+  <configurations defaultconfmapping="default">
			
 
				+    <!--these match the Maven configurations-->
			
 
				+    <conf name="default" extends="master,runtime"/>
			
 
				+    <conf name="master" description="contains the artifact but no dependencies"/>
			
 
				+    <conf name="runtime" description="runtime but not the artifact" />
			
 
				+
			
 
				+    <conf name="common" visibility="private" 
			
 
				+      extends="runtime"
			
 
				+      description="artifacts needed compile/test the application"/>
			
 
				+    <conf name="test" visibility="private" extends="runtime"/>
			
 
				+  </configurations>
			
 
				+
			
 
				+  <publications>
			
 
				+    <!--get the artifact from our module name-->
			
 
				+    <artifact conf="master"/>
			
 
				+  </publications>
			
 
				+  <dependencies>
			
 
				+    <dependency org="commons-logging"
			
 
				+      name="commons-logging"
			
 
				+      rev="${commons-logging.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="log4j"
			
 
				+      name="log4j"
			
 
				+      rev="${log4j.version}"
			
 
				+      conf="common->master"/>
			
 
				+  </dependencies>
			
 
				+</ivy-module>
			
--- a/common/src/contrib/failmon/ivy/libraries.properties
+++ b/common/src/contrib/failmon/ivy/libraries.properties
@@ -0,0 +1,17 @@
 
				+#   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+#   you may not use this file except in compliance with the License.
			
 
				+#   You may obtain a copy of the License at
			
 
				+#
			
 
				+#       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+#   Unless required by applicable law or agreed to in writing, software
			
 
				+#   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+#   See the License for the specific language governing permissions and
			
 
				+#   limitations under the License.
			
 
				+
			
 
				+#This properties file lists the versions of the various artifacts used by streaming.
			
 
				+#It drives ivy and the generation of a maven POM
			
 
				+
			
 
				+#Please list the dependencies name with version if they are different from the ones 
			
 
				+#listed in the global libraries.properties file (in alphabetical order)
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Anonymizer.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Anonymizer.java
@@ -0,0 +1,154 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.io.UnsupportedEncodingException;
			
 
				+import java.security.MessageDigest;
			
 
				+import java.security.NoSuchAlgorithmException;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * This class provides anonymization to SerializedRecord objects. It 
			
 
				+ * anonymizes all hostnames, ip addresses and file names/paths
			
 
				+ * that appear in EventRecords gathered from the logs
			
 
				+ * and other system utilities. Such values are hashed using a
			
 
				+ * cryptographically safe one-way-hash algorithm (MD5).
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class Anonymizer {
			
 
				+
			
 
				+  /**
			
 
				+	 * Anonymize hostnames, ip addresses and file names/paths
			
 
				+   * that appear in fields of a SerializedRecord.
			
 
				+   * 
			
 
				+	 * @param sr the input SerializedRecord
			
 
				+	 * 
			
 
				+	 * @return the anonymized SerializedRecord
			
 
				+	 */  	
			
 
				+  public static SerializedRecord anonymize(SerializedRecord sr)
			
 
				+      throws Exception {
			
 
				+
			
 
				+    String hostname = sr.get("hostname");
			
 
				+
			
 
				+    if (hostname == null)
			
 
				+      throw new Exception("Malformed SerializedRecord: no hostname found");
			
 
				+
			
 
				+    if ("true".equalsIgnoreCase(Environment
			
 
				+        .getProperty("anonymizer.hash.hostnames"))) {
			
 
				+      // hash the node's hostname
			
 
				+      anonymizeField(sr, "message", hostname, "_hn_");
			
 
				+      anonymizeField(sr, "hostname", hostname, "_hn_");
			
 
				+      // hash all other hostnames
			
 
				+      String suffix = Environment.getProperty("anonymizer.hostname.suffix");
			
 
				+      if (suffix != null)
			
 
				+        anonymizeField(sr, "message", "(\\S+\\.)*" + suffix, "_hn_");
			
 
				+    }
			
 
				+
			
 
				+    if ("true".equalsIgnoreCase(Environment.getProperty("anonymizer.hash.ips"))) {
			
 
				+      // hash all ip addresses
			
 
				+      String ipPattern = "(\\d{1,3}\\.){3}\\d{1,3}";
			
 
				+      anonymizeField(sr, "message", ipPattern, "_ip_");
			
 
				+      anonymizeField(sr, "ips", ipPattern, "_ip_");
			
 
				+      // if multiple ips are present for a node:
			
 
				+      int i = 0;
			
 
				+      while (sr.get("ips" + "#" + i) != null)
			
 
				+        anonymizeField(sr, "ips" + "#" + i++, ipPattern, "_ip_");
			
 
				+
			
 
				+      if ("NIC".equalsIgnoreCase(sr.get("type")))
			
 
				+        anonymizeField(sr, "ipAddress", ipPattern, "_ip_");
			
 
				+    }
			
 
				+
			
 
				+    if ("true".equalsIgnoreCase(Environment
			
 
				+        .getProperty("anonymizer.hash.filenames"))) {
			
 
				+      // hash every filename present in messages
			
 
				+      anonymizeField(sr, "message", "\\s+/(\\S+/)*[^:\\s]*", " _fn_");
			
 
				+      anonymizeField(sr, "message", "\\s+hdfs://(\\S+/)*[^:\\s]*",
			
 
				+          " hdfs://_fn_");
			
 
				+    }
			
 
				+
			
 
				+    return sr;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Anonymize hostnames, ip addresses and file names/paths
			
 
				+   * that appear in fields of an EventRecord, after it gets
			
 
				+   * serialized into a SerializedRecord.
			
 
				+   * 
			
 
				+   * @param er the input EventRecord
			
 
				+   * 
			
 
				+   * @return the anonymized SerializedRecord
			
 
				+   */   
			
 
				+  public static SerializedRecord anonymize(EventRecord er) throws Exception {
			
 
				+    return anonymize(new SerializedRecord(er));
			
 
				+  }
			
 
				+
			
 
				+  
			
 
				+  private static String anonymizeField(SerializedRecord sr, String fieldName,
			
 
				+      String pattern, String prefix) {
			
 
				+    String txt = sr.get(fieldName);
			
 
				+
			
 
				+    if (txt == null)
			
 
				+      return null;
			
 
				+    else {
			
 
				+      String anon = getMD5Hash(pattern);
			
 
				+      sr.set(fieldName, txt.replaceAll(pattern, (prefix == null ? "" : prefix)
			
 
				+          + anon));
			
 
				+      return txt;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Create the MD5 digest of an input text.
			
 
				+   * 
			
 
				+   * @param text the input text
			
 
				+   * 
			
 
				+   * @return the hexadecimal representation of the MD5 digest
			
 
				+   */   
			
 
				+  public static String getMD5Hash(String text) {
			
 
				+    MessageDigest md;
			
 
				+    byte[] md5hash = new byte[32];
			
 
				+    try {
			
 
				+      md = MessageDigest.getInstance("MD5");
			
 
				+      md.update(text.getBytes("iso-8859-1"), 0, text.length());
			
 
				+      md5hash = md.digest();
			
 
				+    } catch (NoSuchAlgorithmException e) {
			
 
				+      e.printStackTrace();
			
 
				+    } catch (UnsupportedEncodingException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+    return convertToHex(md5hash);
			
 
				+  }
			
 
				+
			
 
				+  private static String convertToHex(byte[] data) {
			
 
				+    StringBuilder buf = new StringBuilder();
			
 
				+    for (int i = 0; i < data.length; i++) {
			
 
				+      int halfbyte = (data[i] >>> 4) & 0x0F;
			
 
				+      int two_halfs = 0;
			
 
				+      do {
			
 
				+        if ((0 <= halfbyte) && (halfbyte <= 9))
			
 
				+          buf.append((char) ('0' + halfbyte));
			
 
				+        else
			
 
				+          buf.append((char) ('a' + (halfbyte - 10)));
			
 
				+        halfbyte = data[i] & 0x0F;
			
 
				+      } while (two_halfs++ < 1);
			
 
				+    }
			
 
				+    return buf.toString();
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/CPUParser.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/CPUParser.java
@@ -0,0 +1,101 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.net.InetAddress;
			
 
				+import java.util.Calendar;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * Objects of this class parse the /proc/cpuinfo file to 
			
 
				+ * gather information about present processors in the system.
			
 
				+ *
			
 
				+ **********************************************************/
			
 
				+
			
 
				+
			
 
				+public class CPUParser extends ShellParser {
			
 
				+
			
 
				+ /**
			
 
				+  * Constructs a CPUParser
			
 
				+  */
			
 
				+  public CPUParser() {
			
 
				+    super();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Reads and parses /proc/cpuinfo and creates an appropriate 
			
 
				+   * EventRecord that holds the desirable information.
			
 
				+   * 
			
 
				+   * @param s unused parameter
			
 
				+   * 
			
 
				+   * @return the EventRecord created
			
 
				+   */
			
 
				+  public EventRecord query(String s) throws Exception {
			
 
				+    CharSequence sb = Environment.runCommandGeneric("cat /proc/cpuinfo");
			
 
				+    EventRecord retval = new EventRecord(InetAddress.getLocalHost()
			
 
				+        .getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
			
 
				+        .getHostName()), Calendar.getInstance(), "CPU", "Unknown", "CPU", "-");
			
 
				+
			
 
				+    retval.set("processors", findAll("\\s*processor\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1, ", "));
			
 
				+
			
 
				+    retval.set("model name", findPattern("\\s*model name\\s*:\\s*(.+)", sb
			
 
				+        .toString(), 1));
			
 
				+
			
 
				+    retval.set("frequency", findAll("\\s*cpu\\s*MHz\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1, ", "));
			
 
				+
			
 
				+    retval.set("physical id", findAll("\\s*physical\\s*id\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1, ", "));
			
 
				+
			
 
				+    retval.set("core id", findAll("\\s*core\\s*id\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1, ", "));
			
 
				+
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Invokes query() to do the parsing and handles parsing errors. 
			
 
				+   * 
			
 
				+   * @return an array of EventRecords that holds one element that represents
			
 
				+   * the current state of /proc/cpuinfo
			
 
				+   */
			
 
				+  
			
 
				+  public EventRecord[] monitor() {
			
 
				+
			
 
				+    EventRecord[] recs = new EventRecord[1];
			
 
				+
			
 
				+    try {
			
 
				+      recs[0] = query(null);
			
 
				+    } catch (Exception e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+
			
 
				+    return recs;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Return a String with information about this class
			
 
				+   * 
			
 
				+   * @return A String describing this class
			
 
				+   */
			
 
				+  public String getInfo() {
			
 
				+    return ("CPU Info parser");
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Continuous.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Continuous.java
@@ -0,0 +1,41 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * This class runs FailMon in a continuous mode on the local
			
 
				+ * node.
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class Continuous {
			
 
				+
			
 
				+  public static void main(String[] args) {
			
 
				+
			
 
				+
			
 
				+    Environment.prepare("failmon.properties");
			
 
				+
			
 
				+    Executor ex = new Executor(null);
			
 
				+    new Thread(ex).start();
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Environment.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Environment.java
@@ -0,0 +1,486 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.io.File;
			
 
				+import java.io.FileInputStream;
			
 
				+import java.io.FileNotFoundException;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Properties;
			
 
				+import java.util.regex.Matcher;
			
 
				+import java.util.regex.Pattern;
			
 
				+
			
 
				+import org.apache.commons.logging.*;
			
 
				+import org.apache.log4j.PropertyConfigurator;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * This class provides various methods for interaction with
			
 
				+ * the configuration and the operating system environment. Also
			
 
				+ * provides some helper methods for use by other classes in
			
 
				+ * the package.
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class Environment {
			
 
				+
			
 
				+  public static final int DEFAULT_LOG_INTERVAL = 3600;
			
 
				+
			
 
				+  public static final int DEFAULT_POLL_INTERVAL = 360;
			
 
				+
			
 
				+  public static int MIN_INTERVAL = 5;
			
 
				+
			
 
				+  public static final int MAX_OUTPUT_LENGTH = 51200;
			
 
				+
			
 
				+  public static Log LOG;
			
 
				+  
			
 
				+  static Properties fmProperties = new Properties();
			
 
				+
			
 
				+  static boolean superuser = false;
			
 
				+
			
 
				+  static boolean ready = false;
			
 
				+
			
 
				+  /**
			
 
				+   * Initializes structures needed by other methods. Also determines
			
 
				+   * whether the executing user has superuser privileges. 
			
 
				+   *  
			
 
				+   */
			
 
				+  public static void prepare(String fname) {
			
 
				+
			
 
				+    if (!"Linux".equalsIgnoreCase(System.getProperty("os.name"))) {
			
 
				+      System.err.println("Linux system required for FailMon. Exiting...");
			
 
				+      System.exit(0);
			
 
				+    }
			
 
				+
			
 
				+    System.setProperty("log4j.configuration", "conf/log4j.properties");
			
 
				+    PropertyConfigurator.configure("conf/log4j.properties");
			
 
				+    LOG = LogFactory.getLog("org.apache.hadoop.contrib.failmon");
			
 
				+    logInfo("********** FailMon started ***********");
			
 
				+
			
 
				+    // read parseState file
			
 
				+    PersistentState.readState("conf/parsing.state");
			
 
				+    
			
 
				+    try {
			
 
				+      FileInputStream propFile = new FileInputStream(fname);
			
 
				+      fmProperties.load(propFile);
			
 
				+      propFile.close();
			
 
				+    } catch (FileNotFoundException e1) {
			
 
				+      e1.printStackTrace();
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+    ready = true;
			
 
				+
			
 
				+    try {
			
 
				+      String sudo_prompt = "passwd_needed:";
			
 
				+      String echo_txt = "access_ok";
			
 
				+      
			
 
				+      Process p = Runtime.getRuntime().exec("sudo -S -p " + sudo_prompt + " echo " + echo_txt );
			
 
				+      InputStream inps = p.getInputStream();
			
 
				+      InputStream errs = p.getErrorStream();
			
 
				+      
			
 
				+      while (inps.available() < echo_txt.length() && errs.available() < sudo_prompt.length())
			
 
				+	Thread.sleep(100);
			
 
				+
			
 
				+      byte [] buf;
			
 
				+      String s;
			
 
				+      
			
 
				+      if (inps.available() >= echo_txt.length()) {
			
 
				+        buf = new byte[inps.available()];
			
 
				+        inps.read(buf);
			
 
				+        s = new String(buf);
			
 
				+        if (s.startsWith(echo_txt)) {
			
 
				+          superuser = true;
			
 
				+	  logInfo("Superuser privileges found!");
			
 
				+	} else {
			
 
				+	  // no need to read errs
			
 
				+	  superuser = false;
			
 
				+	  logInfo("Superuser privileges not found.");
			
 
				+	}
			
 
				+      }
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    } catch (InterruptedException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Fetches the value of a property from the configuration file.
			
 
				+   * 
			
 
				+   *  @param key the name of the property
			
 
				+   *  
			
 
				+   *  @return the value of the property, if it exists and
			
 
				+   *  null otherwise
			
 
				+   */
			
 
				+  public static String getProperty(String key) {
			
 
				+    if (!ready)
			
 
				+      prepare("conf/failmon.properties");
			
 
				+    return fmProperties.getProperty(key);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Sets the value of a property inthe configuration file.
			
 
				+   * 
			
 
				+   *  @param key the name of the property
			
 
				+   *  @param value the new value for the property
			
 
				+   *  
			
 
				+   */
			
 
				+  
			
 
				+  public static void setProperty(String key, String value) {
			
 
				+    fmProperties.setProperty(key, value);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Scans the configuration file to determine which monitoring
			
 
				+   * utilities are available in the system. For each one of them, a
			
 
				+   * job is created. All such jobs are scheduled and executed by
			
 
				+   * Executor.
			
 
				+   * 
			
 
				+   * @return an ArrayList that contains jobs to be executed by theExecutor. 
			
 
				+   */
			
 
				+  public static ArrayList<MonitorJob> getJobs() {
			
 
				+
			
 
				+    ArrayList<MonitorJob> monitors = new ArrayList<MonitorJob>();
			
 
				+    int timeInt = 0;
			
 
				+
			
 
				+    // for Hadoop Log parsing
			
 
				+    String [] fnames_r = getProperty("log.hadoop.filenames").split(",\\s*");
			
 
				+    String tmp = getProperty("log.hadoop.enabled");
			
 
				+
			
 
				+    String [] fnames = expandDirs(fnames_r, ".*(.log).*");
			
 
				+
			
 
				+    timeInt = setValue("log.hadoop.interval", DEFAULT_LOG_INTERVAL);
			
 
				+    
			
 
				+    if ("true".equalsIgnoreCase(tmp) && fnames[0] != null)
			
 
				+      for (String fname : fnames) {
			
 
				+        File f = new File(fname);
			
 
				+        if (f.exists() && f.canRead()) {
			
 
				+          monitors.add(new MonitorJob(new HadoopLogParser(fname), "hadoopLog", timeInt));
			
 
				+	  logInfo("Created Monitor for Hadoop log file: " + f.getAbsolutePath());
			
 
				+	} else if (!f.exists())
			
 
				+	  logInfo("Skipping Hadoop log file " + fname + " (file not found)");
			
 
				+	else
			
 
				+	  logInfo("Skipping Hadoop log file " + fname + " (permission denied)");
			
 
				+    }
			
 
				+    
			
 
				+    
			
 
				+    // for System Log parsing
			
 
				+    fnames_r = getProperty("log.system.filenames").split(",\\s*");
			
 
				+    tmp = getProperty("log.system.enabled");
			
 
				+
			
 
				+    fnames = expandDirs(fnames_r, ".*(messages).*");
			
 
				+
			
 
				+    timeInt = setValue("log.system.interval", DEFAULT_LOG_INTERVAL);
			
 
				+    
			
 
				+    if ("true".equalsIgnoreCase(tmp))
			
 
				+      for (String fname : fnames) {
			
 
				+        File f = new File(fname);
			
 
				+        if (f.exists() && f.canRead()) {
			
 
				+          monitors.add(new MonitorJob(new SystemLogParser(fname), "systemLog", timeInt));
			
 
				+	  logInfo("Created Monitor for System log file: " + f.getAbsolutePath());
			
 
				+        } else if (!f.exists())
			
 
				+	  logInfo("Skipping system log file " + fname + " (file not found)");
			
 
				+	else
			
 
				+	  logInfo("Skipping system log file " + fname + " (permission denied)");
			
 
				+      }
			
 
				+        
			
 
				+
			
 
				+    // for network interfaces
			
 
				+    tmp = getProperty("nic.enabled");
			
 
				+
			
 
				+    timeInt = setValue("nics.interval", DEFAULT_POLL_INTERVAL);
			
 
				+    
			
 
				+    if ("true".equalsIgnoreCase(tmp)) {
			
 
				+      monitors.add(new MonitorJob(new NICParser(), "nics", timeInt));
			
 
				+      logInfo("Created Monitor for NICs");
			
 
				+    }
			
 
				+
			
 
				+    // for cpu
			
 
				+    tmp = getProperty("cpu.enabled");
			
 
				+
			
 
				+    timeInt = setValue("cpu.interval", DEFAULT_POLL_INTERVAL);
			
 
				+    
			
 
				+    if ("true".equalsIgnoreCase(tmp)) {
			
 
				+      monitors.add(new MonitorJob(new CPUParser(), "cpu", timeInt));
			
 
				+      logInfo("Created Monitor for CPUs");
			
 
				+    }
			
 
				+
			
 
				+    // for disks
			
 
				+    tmp = getProperty("disks.enabled");
			
 
				+
			
 
				+    timeInt = setValue("disks.interval", DEFAULT_POLL_INTERVAL);
			
 
				+    
			
 
				+    if ("true".equalsIgnoreCase(tmp)) {
			
 
				+      // check privileges if a disk with no disks./dev/xxx/.source is found
			
 
				+      boolean smart_present = checkExistence("smartctl");
			
 
				+      int disks_ok = 0;
			
 
				+      String devicesStr = getProperty("disks.list");
			
 
				+      String[] devices = null;
			
 
				+
			
 
				+      if (devicesStr != null)
			
 
				+        devices = devicesStr.split(",\\s*");
			
 
				+      
			
 
				+      for (int i = 0; i< devices.length; i++) {
			
 
				+        boolean file_present = false;
			
 
				+        boolean disk_present = false;
			
 
				+        
			
 
				+        String fileloc = getProperty("disks." + devices[i] + ".source");
			
 
				+        if (fileloc != null && fileloc.equalsIgnoreCase("true"))
			
 
				+          file_present = true;
			
 
				+        
			
 
				+        if (!file_present) 
			
 
				+          if (superuser) {
			
 
				+              CharSequence sb = runCommandGeneric("sudo smartctl -i " + devices[i]);
			
 
				+              String patternStr = "[(failed)(device not supported)]";
			
 
				+              Pattern pattern = Pattern.compile(patternStr);
			
 
				+              Matcher matcher = pattern.matcher(sb.toString());
			
 
				+              if (matcher.find(0))
			
 
				+                disk_present = false;
			
 
				+              else
			
 
				+                disk_present = true;            
			
 
				+          }
			
 
				+        if (file_present || (disk_present && smart_present)) {
			
 
				+          disks_ok++;
			
 
				+        } else
			
 
				+          devices[i] = null;
			
 
				+      } 
			
 
				+      
			
 
				+      // now remove disks that dont exist
			
 
				+      StringBuilder resetSB = new StringBuilder();
			
 
				+      for (int j = 0; j < devices.length; j++) {
			
 
				+        resetSB.append(devices[j] == null ? "" : devices[j] + ", ");
			
 
				+	if (devices[j] != null)
			
 
				+	    logInfo("Found S.M.A.R.T. attributes for disk " + devices[j]);
			
 
				+      }
			
 
				+      // fix the property
			
 
				+      if (resetSB.length() >= 2)
			
 
				+        setProperty("disks.list", resetSB.substring(0, resetSB.length() - 2));
			
 
				+      
			
 
				+      if (disks_ok > 0) {
			
 
				+        monitors.add(new MonitorJob(new SMARTParser(), "disks", timeInt));
			
 
				+	logInfo("Created Monitor for S.M.A.R.T disk attributes");
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    // for lm-sensors
			
 
				+    tmp = getProperty("sensors.enabled");
			
 
				+
			
 
				+    timeInt = setValue("sensors.interval", DEFAULT_POLL_INTERVAL);
			
 
				+    
			
 
				+    if ("true".equalsIgnoreCase(tmp) && checkExistence("sensors")) {
			
 
				+      monitors.add(new MonitorJob(new SensorsParser(), "sensors", timeInt));
			
 
				+      logInfo("Created Monitor for lm-sensors output");
			
 
				+    }
			
 
				+
			
 
				+    return monitors;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Determines the minimum interval at which the executor thread
			
 
				+   * needs to wake upto execute jobs. Essentially, this is interval 
			
 
				+   * equals the GCD of intervals of all scheduled jobs. 
			
 
				+   * 
			
 
				+   *  @param monitors the list of scheduled jobs
			
 
				+   *  
			
 
				+   *  @return the minimum interval between two scheduled jobs
			
 
				+   */
			
 
				+  public static int getInterval(ArrayList<MonitorJob> monitors) {
			
 
				+    String tmp = getProperty("executor.interval.min");
			
 
				+    if (tmp != null)
			
 
				+      MIN_INTERVAL = Integer.parseInt(tmp);
			
 
				+
			
 
				+    int[] monIntervals = new int[monitors.size()];
			
 
				+
			
 
				+    for (int i = 0; i < monitors.size(); i++)
			
 
				+      monIntervals[i] = monitors.get(i).interval;
			
 
				+
			
 
				+    return Math.max(MIN_INTERVAL, gcd(monIntervals));
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Checks whether a specific shell command is available
			
 
				+   * in the system. 
			
 
				+   * 
			
 
				+   *  @param cmd the command to check against
			
 
				+   *
			
 
				+   *  @return true, if the command is availble, false otherwise
			
 
				+   */
			
 
				+  public static boolean checkExistence(String cmd) {
			
 
				+    CharSequence sb = runCommandGeneric("which " + cmd);
			
 
				+    if (sb.length() > 1)
			
 
				+      return true;
			
 
				+
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Runs a shell command in the system and provides a StringBuilder
			
 
				+   * with the output of the command.
			
 
				+   * <p>This method is deprecated. See related method that returns a CharSequence as oppposed to a StringBuffer.
			
 
				+   * 
			
 
				+   *  @param cmd an array of string that form the command to run 
			
 
				+   *  
			
 
				+   *  @return a text that contains the output of the command 
			
 
				+   *  @see #runCommandGeneric(String[])
			
 
				+   *  @deprecated
			
 
				+   */
			
 
				+  public static StringBuffer runCommand(String[] cmd) {
			
 
				+    return new StringBuffer(runCommandGeneric(cmd));
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Runs a shell command in the system and provides a StringBuilder
			
 
				+   * with the output of the command.
			
 
				+   * 
			
 
				+   *  @param cmd an array of string that form the command to run 
			
 
				+   *  
			
 
				+   *  @return a text that contains the output of the command 
			
 
				+   */
			
 
				+  public static CharSequence runCommandGeneric(String[] cmd) {
			
 
				+    StringBuilder retval = new StringBuilder(MAX_OUTPUT_LENGTH);
			
 
				+    Process p;
			
 
				+    try {
			
 
				+      p = Runtime.getRuntime().exec(cmd);
			
 
				+      InputStream tmp = p.getInputStream();
			
 
				+      p.waitFor();
			
 
				+      int c;
			
 
				+      while ((c = tmp.read()) != -1)
			
 
				+        retval.append((char) c);
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    } catch (InterruptedException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+
			
 
				+    return retval;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Runs a shell command in the system and provides a StringBuilder
			
 
				+   * with the output of the command.
			
 
				+   * <p>This method is deprecated in favor of the one that returns CharSequence as opposed to StringBuffer
			
 
				+   *  @param cmd the command to run 
			
 
				+   *  
			
 
				+   *  @return a text that contains the output of the command 
			
 
				+   *  @see #runCommandGeneric(String)
			
 
				+   *  @deprecated
			
 
				+   */
			
 
				+  public static StringBuffer runCommand(String cmd) {
			
 
				+    return new StringBuffer(runCommandGeneric(cmd));
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Runs a shell command in the system and provides a StringBuilder
			
 
				+   * with the output of the command.
			
 
				+   * 
			
 
				+   *  @param cmd the command to run 
			
 
				+   *  
			
 
				+   *  @return a text that contains the output of the command 
			
 
				+   */
			
 
				+  public static CharSequence runCommandGeneric(String cmd) {
			
 
				+    return runCommandGeneric(cmd.split("\\s+"));
			
 
				+  }  
			
 
				+  /**
			
 
				+   * Determines the greatest common divisor (GCD) of two integers.
			
 
				+   * 
			
 
				+   *  @param m the first integer
			
 
				+   *  @param n the second integer
			
 
				+   *  
			
 
				+   *  @return the greatest common divisor of m and n
			
 
				+   */
			
 
				+  public static int gcd(int m, int n) {
			
 
				+    if (m == 0 && n == 0)
			
 
				+      return 0;
			
 
				+    if (m < n) {
			
 
				+      int t = m;
			
 
				+      m = n;
			
 
				+      n = t;
			
 
				+    }
			
 
				+    int r = m % n;
			
 
				+    if (r == 0) {
			
 
				+      return n;
			
 
				+    } else {
			
 
				+      return gcd(n, r);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Determines the greatest common divisor (GCD) of a list
			
 
				+   * of integers.
			
 
				+   * 
			
 
				+   *  @param numbers the list of integers to process
			
 
				+   *  
			
 
				+   *  @return the greatest common divisor of all numbers
			
 
				+   */
			
 
				+  public static int gcd(int[] numbers) {
			
 
				+
			
 
				+    if (numbers.length == 1)
			
 
				+      return numbers[0];
			
 
				+
			
 
				+    int g = gcd(numbers[0], numbers[1]);
			
 
				+
			
 
				+    for (int i = 2; i < numbers.length; i++)
			
 
				+      g = gcd(g, numbers[i]);
			
 
				+
			
 
				+    return g;
			
 
				+  }
			
 
				+
			
 
				+  private static String [] expandDirs(String [] input, String patternStr) {
			
 
				+
			
 
				+    ArrayList<String> fnames = new ArrayList<String>();
			
 
				+    Pattern pattern = Pattern.compile(patternStr);
			
 
				+    Matcher matcher;
			
 
				+    File f;
			
 
				+    
			
 
				+    for (String fname : input) {
			
 
				+      f = new File(fname);
			
 
				+      if (f.exists()) {
			
 
				+	if (f.isDirectory()) {
			
 
				+	  // add all matching files
			
 
				+	  File [] fcs = f.listFiles();
			
 
				+	  for (File fc : fcs) {
			
 
				+	    matcher = pattern.matcher(fc.getName());
			
 
				+	    if (matcher.find() && fc.isFile())
			
 
				+	      fnames.add(fc.getAbsolutePath());
			
 
				+	  }
			
 
				+	} else {
			
 
				+	  // normal file, just add to output
			
 
				+	  fnames.add(f.getAbsolutePath());
			
 
				+	}
			
 
				+      }
			
 
				+    }
			
 
				+    return fnames.toArray(input);
			
 
				+  }
			
 
				+
			
 
				+  private static int setValue(String propname, int defaultValue) {
			
 
				+
			
 
				+    String v = getProperty(propname);
			
 
				+
			
 
				+    if (v != null)
			
 
				+      return Integer.parseInt(v);
			
 
				+    else
			
 
				+      return defaultValue;
			
 
				+  }
			
 
				+
			
 
				+  
			
 
				+  public static void logInfo(String str) {
			
 
				+    LOG.info(str);
			
 
				+  }
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/EventRecord.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/EventRecord.java
@@ -0,0 +1,151 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.net.InetAddress;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Calendar;
			
 
				+import java.util.Collections;
			
 
				+import java.util.HashMap;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * Objects of this class represent metrics collected for 
			
 
				+ * a specific hardware source. Each EventRecord contains a HashMap of 
			
 
				+ * (key, value) pairs, each of which represents a property of
			
 
				+ * the metered value. For instance, when parsing a log file, an
			
 
				+ * EventRecord is created for each log entry, which contains 
			
 
				+ * the hostname and the ip addresses of the node, timestamp of
			
 
				+ * the log entry, the actual message etc. Each and every EventRecord
			
 
				+ * contains the hostname of the machine on which it was collected,
			
 
				+ * its IP address and the time of collection.
			
 
				+ * 
			
 
				+ * The main purpose of this class is to provide a uniform format
			
 
				+ * for records collected from various system compontents (logs,
			
 
				+ * ifconfig, smartmontools, lm-sensors etc). All metric values are 
			
 
				+ * converted into this format after they are collected by a
			
 
				+ * Monitored object.
			
 
				+ *
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class EventRecord {
			
 
				+
			
 
				+  HashMap<String, Object> fields;
			
 
				+
			
 
				+  /**
			
 
				+   * Create the EventRecord given the most common properties
			
 
				+   * among different metric types.
			
 
				+   */
			
 
				+  public EventRecord(String _hostname, Object [] _ips, Calendar _timestamp,
			
 
				+      String _type, String _logLevel, String _source, String _message) {
			
 
				+    fields = new HashMap<String, Object>();
			
 
				+    fields.clear();
			
 
				+    set("hostname", _hostname);
			
 
				+    set("ips", _ips);
			
 
				+    set("timestamp", _timestamp);
			
 
				+    set("type", _type);
			
 
				+    set("logLevel", _logLevel);
			
 
				+    set("source", _source);
			
 
				+    set("message", _message);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Create the EventRecord with no fields other than "invalid" as
			
 
				+   * the hostname. This is only used as a dummy.
			
 
				+   */
			
 
				+  public EventRecord() {
			
 
				+    // creates an invalid record
			
 
				+    fields = new HashMap<String, Object>();
			
 
				+    fields.clear();
			
 
				+    set("hostname", "invalid");
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Return the HashMap of properties of the EventRecord.
			
 
				+   * 
			
 
				+   * @return a HashMap that contains all properties of the record.
			
 
				+   */
			
 
				+  public final HashMap<String, Object> getMap() {
			
 
				+    return fields;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the value of a property of the EventRecord.
			
 
				+   * 
			
 
				+   * @param fieldName the name of the property to set
			
 
				+   * @param fieldValue the value of the property to set
			
 
				+   * 
			
 
				+   */
			
 
				+  public void set(String fieldName, Object fieldValue) {
			
 
				+    if (fieldValue != null)
			
 
				+      fields.put(fieldName, fieldValue);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the value of a property of the EventRecord.
			
 
				+   * If the property with the specific key is not found,
			
 
				+   * null is returned.
			
 
				+   * 
			
 
				+   * @param fieldName the name of the property to get.
			
 
				+   */
			
 
				+  public Object get(String fieldName) {
			
 
				+    return fields.get(fieldName);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Check if the EventRecord is a valid one, i.e., whether
			
 
				+   * it represents meaningful metric values.
			
 
				+   * 
			
 
				+   * @return true if the EventRecord is a valid one, false otherwise.
			
 
				+   */
			
 
				+  public boolean isValid() {
			
 
				+    return !("invalid".equalsIgnoreCase((String) fields.get("hostname")));
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Creates and returns a string representation of the object.
			
 
				+   * 
			
 
				+   * @return a String representation of the object
			
 
				+   */
			
 
				+
			
 
				+  public String toString() {
			
 
				+    String retval = "";
			
 
				+    ArrayList<String> keys = new ArrayList<String>(fields.keySet());
			
 
				+    Collections.sort(keys);
			
 
				+
			
 
				+    for (int i = 0; i < keys.size(); i++) {
			
 
				+      Object value = fields.get(keys.get(i));
			
 
				+      if (value == null)
			
 
				+        retval += keys.get(i) + ":\tnull\n";
			
 
				+      else if (value instanceof String)
			
 
				+        retval += keys.get(i) + ":\t" + value + "\n";
			
 
				+      else if (value instanceof Calendar)
			
 
				+        retval += keys.get(i) + ":\t" + ((Calendar) value).getTime() + "\n";
			
 
				+      else if (value instanceof InetAddress[] || value instanceof String []) {
			
 
				+        retval += "Known IPs:\t";
			
 
				+        for (InetAddress ip : ((InetAddress[]) value))
			
 
				+          retval += ip.getHostAddress() + " ";
			
 
				+        retval += "\n";
			
 
				+      } else {
			
 
				+        retval += keys.get(i) + ":\t" + value.toString() + "\n";
			
 
				+      }
			
 
				+    }
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Executor.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Executor.java
@@ -0,0 +1,120 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * This class executes monitoring jobs on all nodes of the
			
 
				+ * cluster, on which we intend to gather failure metrics. 
			
 
				+ * It is basically a thread that sleeps and periodically wakes
			
 
				+ * up to execute monitoring jobs and ship all gathered data to 
			
 
				+ * a "safe" location, which in most cases will be the HDFS 
			
 
				+ * filesystem of the monitored cluster.
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class Executor implements Runnable {
			
 
				+
			
 
				+  public static final int DEFAULT_LOG_INTERVAL = 3600;
			
 
				+
			
 
				+  public static final int DEFAULT_POLL_INTERVAL = 360;
			
 
				+
			
 
				+  public static int MIN_INTERVAL = 5;
			
 
				+
			
 
				+  public static int instances = 0;
			
 
				+
			
 
				+  LocalStore lstore;
			
 
				+
			
 
				+  ArrayList<MonitorJob> monitors;
			
 
				+  
			
 
				+  int interval;
			
 
				+
			
 
				+  int upload_interval;
			
 
				+  int upload_counter;
			
 
				+  
			
 
				+  /**
			
 
				+   * Create an instance of the class and read the configuration
			
 
				+   * file to determine the set of jobs that will be run and the 
			
 
				+   * maximum interval for which the thread can sleep before it 
			
 
				+   * wakes up to execute a monitoring job on the node.
			
 
				+   * 
			
 
				+   */ 
			
 
				+
			
 
				+  public Executor(Configuration conf) {
			
 
				+    
			
 
				+    Environment.prepare("conf/failmon.properties");
			
 
				+    
			
 
				+    String localTmpDir;
			
 
				+    
			
 
				+    if (conf == null) {
			
 
				+      // running as a stand-alone application
			
 
				+      localTmpDir = System.getProperty("java.io.tmpdir");
			
 
				+      Environment.setProperty("local.tmp.dir", localTmpDir);
			
 
				+    } else {
			
 
				+      // running from within Hadoop
			
 
				+      localTmpDir = conf.get("hadoop.tmp.dir");
			
 
				+      String hadoopLogPath = System.getProperty("hadoop.log.dir") + "/" + System.getProperty("hadoop.log.file");
			
 
				+      Environment.setProperty("hadoop.log.file", hadoopLogPath);
			
 
				+      Environment.setProperty("local.tmp.dir", localTmpDir);
			
 
				+    }
			
 
				+    
			
 
				+    monitors = Environment.getJobs();
			
 
				+    interval = Environment.getInterval(monitors);
			
 
				+    upload_interval = LocalStore.UPLOAD_INTERVAL;
			
 
				+    lstore = new LocalStore();
			
 
				+    
			
 
				+    if (Environment.getProperty("local.upload.interval") != null) 
			
 
				+     upload_interval = Integer.parseInt(Environment.getProperty("local.upload.interval"));
			
 
				+
			
 
				+    instances++;
			
 
				+  }
			
 
				+
			
 
				+  public void run() {
			
 
				+    upload_counter = upload_interval;
			
 
				+
			
 
				+    Environment.logInfo("Failmon Executor thread started successfully.");
			
 
				+    while (true) {
			
 
				+      try {
			
 
				+        Thread.sleep(interval * 1000);
			
 
				+        for (int i = 0; i < monitors.size(); i++) {
			
 
				+          monitors.get(i).counter -= interval;
			
 
				+          if (monitors.get(i).counter <= 0) {
			
 
				+            monitors.get(i).reset();
			
 
				+            Environment.logInfo("Calling " + monitors.get(i).job.getInfo() + "...\t");
			
 
				+            monitors.get(i).job.monitor(lstore);
			
 
				+          }
			
 
				+        }
			
 
				+        upload_counter -= interval;
			
 
				+        if (upload_counter <= 0) {
			
 
				+          lstore.upload();
			
 
				+          upload_counter = upload_interval;
			
 
				+        }
			
 
				+      } catch (InterruptedException e) {
			
 
				+        e.printStackTrace();
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public void cleanup() {
			
 
				+    instances--;   
			
 
				+  }
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/HDFSMerger.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/HDFSMerger.java
@@ -0,0 +1,154 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.io.BufferedOutputStream;
			
 
				+import java.io.InputStream;
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.BufferedWriter;
			
 
				+import java.io.File;
			
 
				+import java.io.FileOutputStream;
			
 
				+import java.io.FileReader;
			
 
				+import java.io.FileWriter;
			
 
				+import java.io.IOException;
			
 
				+import java.io.FileNotFoundException;
			
 
				+import java.net.InetAddress;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Calendar;
			
 
				+import java.util.zip.CRC32;
			
 
				+import java.util.zip.CheckedOutputStream;
			
 
				+import java.util.zip.ZipEntry;
			
 
				+import java.util.zip.ZipInputStream;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FSDataOutputStream;
			
 
				+import org.apache.hadoop.fs.FSDataInputStream;
			
 
				+
			
 
				+public class HDFSMerger {
			
 
				+
			
 
				+  Configuration hadoopConf;
			
 
				+  FileSystem hdfs;
			
 
				+  
			
 
				+  String hdfsDir;
			
 
				+  
			
 
				+  FileStatus [] inputFiles;
			
 
				+
			
 
				+  Path outputFilePath;
			
 
				+  FSDataOutputStream outputFile;
			
 
				+    
			
 
				+  boolean compress;
			
 
				+
			
 
				+  FileWriter fw;
			
 
				+
			
 
				+  BufferedWriter writer;
			
 
				+
			
 
				+  public HDFSMerger() throws IOException {
			
 
				+
			
 
				+    String hadoopConfPath; 
			
 
				+
			
 
				+    if (Environment.getProperty("hadoop.conf.path") == null)
			
 
				+      hadoopConfPath = "../../../conf";
			
 
				+    else
			
 
				+      hadoopConfPath = Environment.getProperty("hadoop.conf.path");
			
 
				+
			
 
				+    // Read the configuration for the Hadoop environment
			
 
				+    Configuration hadoopConf = new Configuration();
			
 
				+    hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-default.xml"));
			
 
				+    hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-site.xml"));
			
 
				+    
			
 
				+    // determine the local output file name
			
 
				+    if (Environment.getProperty("local.tmp.filename") == null)
			
 
				+      Environment.setProperty("local.tmp.filename", "failmon.dat");
			
 
				+    
			
 
				+    // determine the upload location
			
 
				+    hdfsDir = Environment.getProperty("hdfs.upload.dir");
			
 
				+    if (hdfsDir == null)
			
 
				+      hdfsDir = "/failmon";
			
 
				+
			
 
				+    hdfs = FileSystem.get(hadoopConf);
			
 
				+    
			
 
				+    Path hdfsDirPath = new Path(hadoopConf.get("fs.default.name") + hdfsDir);
			
 
				+
			
 
				+    try {
			
 
				+      if (!hdfs.getFileStatus(hdfsDirPath).isDir()) {
			
 
				+	Environment.logInfo("HDFSMerger: Not an HDFS directory: " + hdfsDirPath.toString());
			
 
				+	System.exit(0);
			
 
				+      }
			
 
				+    } catch (FileNotFoundException e) {
			
 
				+      Environment.logInfo("HDFSMerger: Directory not found: " + hdfsDirPath.toString());
			
 
				+    }
			
 
				+
			
 
				+    inputFiles = hdfs.listStatus(hdfsDirPath);
			
 
				+
			
 
				+    outputFilePath = new Path(hdfsDirPath.toString() + "/" + "merge-"
			
 
				+			  + Calendar.getInstance().getTimeInMillis() + ".dat");
			
 
				+    outputFile = hdfs.create(outputFilePath);
			
 
				+    
			
 
				+    for (FileStatus fstatus : inputFiles) {
			
 
				+      appendFile(fstatus.getPath());
			
 
				+      hdfs.delete(fstatus.getPath(), true);
			
 
				+    }
			
 
				+
			
 
				+    outputFile.close();
			
 
				+
			
 
				+    Environment.logInfo("HDFS file merging complete!");
			
 
				+  }
			
 
				+
			
 
				+  private void appendFile (Path inputPath) throws IOException {
			
 
				+    
			
 
				+    FSDataInputStream anyInputFile = hdfs.open(inputPath);
			
 
				+    InputStream inputFile;
			
 
				+    byte buffer[] = new byte[4096];
			
 
				+    
			
 
				+    if (inputPath.toString().endsWith(LocalStore.COMPRESSION_SUFFIX)) {
			
 
				+      // the file is compressed
			
 
				+      inputFile = new ZipInputStream(anyInputFile);
			
 
				+      ((ZipInputStream) inputFile).getNextEntry();
			
 
				+    } else {
			
 
				+      inputFile = anyInputFile;
			
 
				+    }
			
 
				+    
			
 
				+    try {
			
 
				+      int bytesRead = 0;
			
 
				+      while ((bytesRead = inputFile.read(buffer)) > 0) {
			
 
				+	outputFile.write(buffer, 0, bytesRead);
			
 
				+      }
			
 
				+    } catch (IOException e) {
			
 
				+      Environment.logInfo("Error while copying file:" + inputPath.toString());
			
 
				+    } finally {
			
 
				+      inputFile.close();
			
 
				+    }    
			
 
				+  }
			
 
				+
			
 
				+  
			
 
				+  public static void main(String [] args) {
			
 
				+
			
 
				+    Environment.prepare("./conf/failmon.properties");
			
 
				+
			
 
				+    try {
			
 
				+      new HDFSMerger();
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+      }
			
 
				+
			
 
				+  }
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/HadoopLogParser.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/HadoopLogParser.java
@@ -0,0 +1,136 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.Calendar;
			
 
				+import java.util.regex.Matcher;
			
 
				+import java.util.regex.Pattern;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * An object of this class parses a Hadoop log file to create
			
 
				+ * appropriate EventRecords. The log file can either be the log 
			
 
				+ * of a NameNode or JobTracker or DataNode or TaskTracker.
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class HadoopLogParser extends LogParser {
			
 
				+
			
 
				+  /**
			
 
				+   * Create a new parser object and try to find the hostname
			
 
				+   * of the node that generated the log
			
 
				+   */
			
 
				+  public HadoopLogParser(String fname) {
			
 
				+    super(fname);
			
 
				+    if ((dateformat = Environment.getProperty("log.hadoop.dateformat")) == null)
			
 
				+      dateformat = "\\d{4}-\\d{2}-\\d{2}";
			
 
				+    if ((timeformat = Environment.getProperty("log.hadoop.timeformat")) == null)
			
 
				+      timeformat = "\\d{2}:\\d{2}:\\d{2}";
			
 
				+    findHostname();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Parses one line of the log. If the line contains a valid 
			
 
				+   * log entry, then an appropriate EventRecord is returned, after all
			
 
				+   * relevant fields have been parsed.
			
 
				+   *
			
 
				+   *  @param line the log line to be parsed
			
 
				+   *
			
 
				+   *  @return the EventRecord representing the log entry of the line. If 
			
 
				+   *  the line does not contain a valid log entry, then the EventRecord 
			
 
				+   *  returned has isValid() = false. When the end-of-file has been reached,
			
 
				+   *  null is returned to the caller.
			
 
				+   */
			
 
				+  public EventRecord parseLine(String line) throws IOException {
			
 
				+    EventRecord retval = null;
			
 
				+
			
 
				+    if (line != null) {
			
 
				+      // process line
			
 
				+      String patternStr = "(" + dateformat + ")";
			
 
				+      patternStr += "\\s+";
			
 
				+      patternStr += "(" + timeformat + ")";
			
 
				+      patternStr += ".{4}\\s(\\w*)\\s"; // for logLevel
			
 
				+      patternStr += "\\s*([\\w+\\.?]+)"; // for source
			
 
				+      patternStr += ":\\s+(.+)"; // for the message
			
 
				+      Pattern pattern = Pattern.compile(patternStr);
			
 
				+      Matcher matcher = pattern.matcher(line);
			
 
				+
			
 
				+      if (matcher.find(0) && matcher.groupCount() >= 5) {
			
 
				+        retval = new EventRecord(hostname, ips, parseDate(matcher.group(1),
			
 
				+            matcher.group(2)),
			
 
				+	    "HadoopLog",
			
 
				+	    matcher.group(3), // loglevel
			
 
				+            matcher.group(4), // source
			
 
				+            matcher.group(5)); // message
			
 
				+      } else {
			
 
				+        retval = new EventRecord();
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Parse a date found in the Hadoop log.
			
 
				+   * 
			
 
				+   * @return a Calendar representing the date
			
 
				+   */
			
 
				+  protected Calendar parseDate(String strDate, String strTime) {
			
 
				+    Calendar retval = Calendar.getInstance();
			
 
				+    // set date
			
 
				+    String[] fields = strDate.split("-");
			
 
				+    retval.set(Calendar.YEAR, Integer.parseInt(fields[0]));
			
 
				+    retval.set(Calendar.MONTH, Integer.parseInt(fields[1]));
			
 
				+    retval.set(Calendar.DATE, Integer.parseInt(fields[2]));
			
 
				+    // set time
			
 
				+    fields = strTime.split(":");
			
 
				+    retval.set(Calendar.HOUR_OF_DAY, Integer.parseInt(fields[0]));
			
 
				+    retval.set(Calendar.MINUTE, Integer.parseInt(fields[1]));
			
 
				+    retval.set(Calendar.SECOND, Integer.parseInt(fields[2]));
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Attempt to determine the hostname of the node that created the
			
 
				+   * log file. This information can be found in the STARTUP_MSG lines 
			
 
				+   * of the Hadoop log, which are emitted when the node starts.
			
 
				+   * 
			
 
				+   */
			
 
				+  private void findHostname() {
			
 
				+    String startupInfo = Environment.runCommandGeneric(
			
 
				+        "grep --max-count=1 STARTUP_MSG:\\s*host " + file.getName()).toString();
			
 
				+    Pattern pattern = Pattern.compile("\\s+(\\w+/.+)\\s+");
			
 
				+    Matcher matcher = pattern.matcher(startupInfo);
			
 
				+    if (matcher.find(0)) {
			
 
				+      hostname = matcher.group(1).split("/")[0];
			
 
				+      ips = new String[1];
			
 
				+      ips[0] = matcher.group(1).split("/")[1];
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Return a String with information about this class
			
 
				+   * 
			
 
				+   * @return A String describing this class
			
 
				+   */
			
 
				+  public String getInfo() {
			
 
				+    return ("Hadoop Log Parser for file: " + file.getName());
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/LocalStore.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/LocalStore.java
@@ -0,0 +1,282 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.io.BufferedOutputStream;
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.BufferedWriter;
			
 
				+import java.io.File;
			
 
				+import java.io.FileOutputStream;
			
 
				+import java.io.FileReader;
			
 
				+import java.io.FileWriter;
			
 
				+import java.io.IOException;
			
 
				+import java.net.InetAddress;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Calendar;
			
 
				+import java.util.zip.CRC32;
			
 
				+import java.util.zip.CheckedOutputStream;
			
 
				+import java.util.zip.ZipEntry;
			
 
				+import java.util.zip.ZipOutputStream;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * This class takes care of the temporary local storage of 
			
 
				+ * gathered metrics before they get uploaded into HDFS. It writes 
			
 
				+ * Serialized Records as lines in a temporary file and then 
			
 
				+ * compresses and uploads it into HDFS.
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class LocalStore {
			
 
				+
			
 
				+  public final static char FIELD_SEPARATOR = '|';
			
 
				+
			
 
				+  public final static char RECORD_SEPARATOR = '\n';
			
 
				+
			
 
				+  public final static String COMPRESSION_SUFFIX = ".zip";
			
 
				+
			
 
				+  public final static int UPLOAD_INTERVAL = 600;
			
 
				+
			
 
				+  String filename;
			
 
				+  String hdfsDir;
			
 
				+
			
 
				+  boolean compress;
			
 
				+
			
 
				+  FileWriter fw;
			
 
				+
			
 
				+  BufferedWriter writer;
			
 
				+
			
 
				+  /**
			
 
				+   * Create an instance of the class and read the configuration
			
 
				+   * file to determine some output parameters. Then, initiate the 
			
 
				+   * structured needed for the buffered I/O (so that smal appends
			
 
				+   * can be handled efficiently).
			
 
				+   * 
			
 
				+   */ 
			
 
				+
			
 
				+  public LocalStore() {
			
 
				+    // determine the local output file name
			
 
				+    if (Environment.getProperty("local.tmp.filename") == null)
			
 
				+      Environment.setProperty("local.tmp.filename", "failmon.dat");
			
 
				+    
			
 
				+    // local.tmp.dir has been set by the Executor
			
 
				+    if (Environment.getProperty("local.tmp.dir") == null)
			
 
				+      Environment.setProperty("local.tmp.dir", System.getProperty("java.io.tmpdir"));
			
 
				+    
			
 
				+    filename = Environment.getProperty("local.tmp.dir") + "/" +
			
 
				+      Environment.getProperty("local.tmp.filename");
			
 
				+
			
 
				+    // determine the upload location
			
 
				+    hdfsDir = Environment.getProperty("hdfs.upload.dir");
			
 
				+    if (hdfsDir == null)
			
 
				+      hdfsDir = "/failmon";
			
 
				+
			
 
				+    // determine if compression is enabled
			
 
				+    compress = true;
			
 
				+    if ("false".equalsIgnoreCase(Environment
			
 
				+        .getProperty("local.tmp.compression")))
			
 
				+      compress = false;
			
 
				+
			
 
				+    try {
			
 
				+      fw = new FileWriter(filename, true);
			
 
				+      writer = new BufferedWriter(fw);
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Insert an EventRecord to the local storage, after it
			
 
				+   * gets serialized and anonymized.
			
 
				+   * 
			
 
				+   * @param er the EventRecord to be inserted
			
 
				+   */ 
			
 
				+  
			
 
				+  public void insert(EventRecord er) {
			
 
				+    SerializedRecord sr = new SerializedRecord(er);
			
 
				+    try {
			
 
				+      Anonymizer.anonymize(sr);
			
 
				+    } catch (Exception e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+    append(sr);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Insert an array of EventRecords to the local storage, after they
			
 
				+   * get serialized and anonymized.
			
 
				+   * 
			
 
				+   * @param ers the array of EventRecords to be inserted
			
 
				+   */
			
 
				+  public void insert(EventRecord[] ers) {
			
 
				+    for (EventRecord er : ers)
			
 
				+      insert(er);
			
 
				+  }
			
 
				+
			
 
				+  private void append(SerializedRecord sr) {
			
 
				+    try {
			
 
				+      writer.write(pack(sr).toString());
			
 
				+      writer.write(RECORD_SEPARATOR);
			
 
				+      // writer.flush();
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Pack a SerializedRecord into an array of bytes
			
 
				+   * <p>
			
 
				+   * This method is deprecated. 
			
 
				+   * @param sr the SerializedRecord to be packed
			
 
				+   * @return Packed representation fo the Serialized Record
			
 
				+   * @see #packConcurrent(SerializedRecord)
			
 
				+   * @deprecated
			
 
				+   */
			
 
				+  public static StringBuffer pack(SerializedRecord sr) {
			
 
				+    return new StringBuffer(packConcurrent(sr));
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Pack a SerializedRecord into an array of bytes
			
 
				+   * 
			
 
				+   * @param sr the SerializedRecord to be packed
			
 
				+   * @return Packed representation fo the Serialized Record
			
 
				+   */
			
 
				+  public static CharSequence packConcurrent(SerializedRecord sr) {
			
 
				+    StringBuilder sb = new StringBuilder();
			
 
				+
			
 
				+    ArrayList<String> keys = new ArrayList<String>(sr.fields.keySet());
			
 
				+
			
 
				+    if (sr.isValid())
			
 
				+      SerializedRecord.arrangeKeys(keys);
			
 
				+
			
 
				+    for (int i = 0; i < keys.size(); i++) {
			
 
				+      String value = sr.fields.get(keys.get(i));
			
 
				+      sb.append(keys.get(i) + ":" + value);
			
 
				+      sb.append(FIELD_SEPARATOR);
			
 
				+    }
			
 
				+    return sb;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Upload the local file store into HDFS, after it 
			
 
				+   * compressing it. Then a new local file is created 
			
 
				+   * as a temporary record store.
			
 
				+   * 
			
 
				+   */
			
 
				+  public void upload() {
			
 
				+    try {
			
 
				+      writer.flush();
			
 
				+      if (compress)
			
 
				+        zipCompress(filename);
			
 
				+      String remoteName = "failmon-";
			
 
				+      if ("true".equalsIgnoreCase(Environment.getProperty("anonymizer.hash.hostnames")))
			
 
				+        remoteName += Anonymizer.getMD5Hash(InetAddress.getLocalHost().getCanonicalHostName()) + "-";
			
 
				+      else
			
 
				+        remoteName += InetAddress.getLocalHost().getCanonicalHostName() + "-"; 
			
 
				+      remoteName += Calendar.getInstance().getTimeInMillis();//.toString();
			
 
				+      if (compress)
			
 
				+	copyToHDFS(filename + COMPRESSION_SUFFIX, hdfsDir + "/" + remoteName + COMPRESSION_SUFFIX);
			
 
				+      else
			
 
				+	copyToHDFS(filename, hdfsDir + "/" + remoteName);
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+
			
 
				+    // delete and re-open
			
 
				+    try {
			
 
				+      fw.close();
			
 
				+      fw = new FileWriter(filename);
			
 
				+      writer = new BufferedWriter(fw);
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Compress a text file using the ZIP compressing algorithm.
			
 
				+   * 
			
 
				+   * @param filename the path to the file to be compressed
			
 
				+   */
			
 
				+  public static void zipCompress(String filename) throws IOException {
			
 
				+    FileOutputStream fos = new FileOutputStream(filename + COMPRESSION_SUFFIX);
			
 
				+    CheckedOutputStream csum = new CheckedOutputStream(fos, new CRC32());
			
 
				+    ZipOutputStream out = new ZipOutputStream(new BufferedOutputStream(csum));
			
 
				+    out.setComment("Failmon records.");
			
 
				+
			
 
				+    BufferedReader in = new BufferedReader(new FileReader(filename));
			
 
				+    out.putNextEntry(new ZipEntry(new File(filename).getName()));
			
 
				+    int c;
			
 
				+    while ((c = in.read()) != -1)
			
 
				+      out.write(c);
			
 
				+    in.close();
			
 
				+
			
 
				+    out.finish();
			
 
				+    out.close();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Copy a local file to HDFS
			
 
				+   * 
			
 
				+   * @param localFile the filename of the local file
			
 
				+   * @param hdfsFile the HDFS filename to copy to
			
 
				+   */
			
 
				+  public static void copyToHDFS(String localFile, String hdfsFile) throws IOException {
			
 
				+
			
 
				+    String hadoopConfPath; 
			
 
				+
			
 
				+    if (Environment.getProperty("hadoop.conf.path") == null)
			
 
				+      hadoopConfPath = "../../../conf";
			
 
				+    else
			
 
				+      hadoopConfPath = Environment.getProperty("hadoop.conf.path");
			
 
				+
			
 
				+    // Read the configuration for the Hadoop environment
			
 
				+    Configuration hadoopConf = new Configuration();
			
 
				+    hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-default.xml"));
			
 
				+    hadoopConf.addResource(new Path(hadoopConfPath + "/hadoop-site.xml"));
			
 
				+
			
 
				+    // System.out.println(hadoopConf.get("hadoop.tmp.dir"));
			
 
				+    // System.out.println(hadoopConf.get("fs.default.name"));
			
 
				+    FileSystem fs = FileSystem.get(hadoopConf);
			
 
				+
			
 
				+    // HadoopDFS deals with Path
			
 
				+    Path inFile = new Path("file://" + localFile);
			
 
				+    Path outFile = new Path(hadoopConf.get("fs.default.name") + hdfsFile);
			
 
				+
			
 
				+     // Read from and write to new file
			
 
				+    Environment.logInfo("Uploading to HDFS (file " + outFile + ") ...");
			
 
				+    fs.copyFromLocalFile(false, inFile, outFile);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Close the temporary local file
			
 
				+   * 
			
 
				+   */ 
			
 
				+  public void close() {
			
 
				+    try {
			
 
				+    writer.flush();
			
 
				+    writer.close();
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/LogParser.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/LogParser.java
@@ -0,0 +1,214 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.File;
			
 
				+import java.io.FileNotFoundException;
			
 
				+import java.io.FileReader;
			
 
				+import java.io.IOException;
			
 
				+import java.net.InetAddress;
			
 
				+import java.net.UnknownHostException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Calendar;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * This class represents objects that provide log parsing 
			
 
				+ * functionality. Typically, such objects read log files line
			
 
				+ * by line and for each log entry they identify, they create a 
			
 
				+ * corresponding EventRecord. In this way, disparate log files
			
 
				+ * can be merged using the uniform format of EventRecords and can,
			
 
				+ * thus, be processed in a uniform way.
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public abstract class LogParser implements Monitored {
			
 
				+
			
 
				+  File file;
			
 
				+
			
 
				+  BufferedReader reader;
			
 
				+
			
 
				+  String hostname;
			
 
				+
			
 
				+  Object [] ips;
			
 
				+
			
 
				+  String dateformat;
			
 
				+
			
 
				+  String timeformat;
			
 
				+
			
 
				+  private String firstLine;
			
 
				+  private long offset;
			
 
				+
			
 
				+  /**
			
 
				+   * Create a parser that will read from the specified log file.
			
 
				+   * 
			
 
				+   * @param fname the filename of the log file to be read
			
 
				+   */
			
 
				+  public LogParser(String fname) {
			
 
				+    file = new File(fname);
			
 
				+
			
 
				+    ParseState ps = PersistentState.getState(file.getAbsolutePath());
			
 
				+    firstLine = ps.firstLine;
			
 
				+    offset = ps.offset;
			
 
				+    
			
 
				+    try {
			
 
				+      reader = new BufferedReader(new FileReader(file));
			
 
				+      checkForRotation();
			
 
				+      Environment.logInfo("Checked for rotation...");
			
 
				+      reader.skip(offset);
			
 
				+    } catch (FileNotFoundException e) {
			
 
				+      System.err.println(e.getMessage());
			
 
				+      e.printStackTrace();
			
 
				+    } catch (IOException e) {
			
 
				+      System.err.println(e.getMessage());
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+
			
 
				+    setNetworkProperties();
			
 
				+  }
			
 
				+
			
 
				+  protected void setNetworkProperties() {
			
 
				+    // determine hostname and ip addresses for the node
			
 
				+    try {
			
 
				+      // Get hostname
			
 
				+      hostname = InetAddress.getLocalHost().getCanonicalHostName();
			
 
				+      // Get all associated ip addresses
			
 
				+      ips = InetAddress.getAllByName(hostname);
			
 
				+
			
 
				+    } catch (UnknownHostException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Insert all EventRecords that can be extracted for
			
 
				+   * the represented hardware component into a LocalStore.
			
 
				+   * 
			
 
				+   * @param ls the LocalStore into which the EventRecords 
			
 
				+   * are to be stored.
			
 
				+   */
			
 
				+  public void monitor(LocalStore ls) {
			
 
				+    int in = 0;
			
 
				+    EventRecord er = null;
			
 
				+    Environment.logInfo("Started processing log...");
			
 
				+
			
 
				+    while ((er = getNext()) != null) {
			
 
				+      // Environment.logInfo("Processing log line:\t" + in++);
			
 
				+      if (er.isValid()) {
			
 
				+        ls.insert(er);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    PersistentState.updateState(file.getAbsolutePath(), firstLine, offset);
			
 
				+    PersistentState.writeState("conf/parsing.state");
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get an array of all EventRecords that can be extracted for
			
 
				+   * the represented hardware component.
			
 
				+   * 
			
 
				+   * @return The array of EventRecords
			
 
				+   */
			
 
				+  public EventRecord[] monitor() {
			
 
				+
			
 
				+    ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
			
 
				+    EventRecord er;
			
 
				+
			
 
				+    while ((er = getNext()) != null)
			
 
				+      recs.add(er);
			
 
				+
			
 
				+    EventRecord[] T = new EventRecord[recs.size()];
			
 
				+
			
 
				+    return recs.toArray(T);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Continue parsing the log file until a valid log entry is identified.
			
 
				+   * When one such entry is found, parse it and return a corresponding EventRecord.
			
 
				+   * 
			
 
				+   *  
			
 
				+   * @return The EventRecord corresponding to the next log entry
			
 
				+   */
			
 
				+  public EventRecord getNext() {
			
 
				+    try {
			
 
				+	String line = reader.readLine();
			
 
				+	if (line != null) {
			
 
				+	    if (firstLine == null)
			
 
				+		firstLine = new String(line);
			
 
				+	    offset += line.length() + 1;
			
 
				+	    return parseLine(line);
			
 
				+	}
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+    return null;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Return the BufferedReader, that reads the log file
			
 
				+   *  
			
 
				+   * @return The BufferedReader that reads the log file
			
 
				+   */
			
 
				+  public BufferedReader getReader() {
			
 
				+    return reader;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Check whether the log file has been rotated. If so,
			
 
				+   * start reading the file from the beginning.
			
 
				+   *  
			
 
				+   */
			
 
				+  public void checkForRotation() {
			
 
				+    try {
			
 
				+      BufferedReader probe = new BufferedReader(new FileReader(file.getAbsoluteFile()));
			
 
				+      if (firstLine == null || (!firstLine.equals(probe.readLine()))) {
			
 
				+	probe.close();
			
 
				+	// start reading the file from the beginning
			
 
				+        reader.close();
			
 
				+        reader = new BufferedReader(new FileReader(file.getAbsoluteFile()));
			
 
				+	firstLine = null;
			
 
				+	offset = 0;
			
 
				+      }
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Parses one line of the log. If the line contains a valid 
			
 
				+   * log entry, then an appropriate EventRecord is returned, after all
			
 
				+   * relevant fields have been parsed.
			
 
				+   *
			
 
				+   *  @param line the log line to be parsed
			
 
				+   *
			
 
				+   *  @return the EventRecord representing the log entry of the line. If 
			
 
				+   *  the line does not contain a valid log entry, then the EventRecord 
			
 
				+   *  returned has isValid() = false. When the end-of-file has been reached,
			
 
				+   *  null is returned to the caller.
			
 
				+   */
			
 
				+  abstract public EventRecord parseLine(String line) throws IOException;
			
 
				+
			
 
				+  /**
			
 
				+   * Parse a date found in Hadoop log file.
			
 
				+   * 
			
 
				+   * @return a Calendar representing the date
			
 
				+   */
			
 
				+  abstract protected Calendar parseDate(String strDate, String strTime);
			
 
				+
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/MonitorJob.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/MonitorJob.java
@@ -0,0 +1,43 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * This class is a wrapper for a monitoring job. 
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class MonitorJob {
			
 
				+  Monitored job;
			
 
				+
			
 
				+  String type;
			
 
				+  int interval;
			
 
				+  int counter;
			
 
				+
			
 
				+  public MonitorJob(Monitored _job, String _type, int _interval) {
			
 
				+    job = _job;
			
 
				+    type = _type;
			
 
				+    interval = _interval;
			
 
				+    counter = _interval;
			
 
				+  }
			
 
				+
			
 
				+  public void reset() {
			
 
				+    counter = interval;
			
 
				+  }
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Monitored.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/Monitored.java
@@ -0,0 +1,53 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * Represents objects that monitor specific hardware resources and
			
 
				+ * can query them to get EventRecords describing the state of these
			
 
				+ * resources.
			
 
				+ *
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public interface Monitored {
			
 
				+  /**
			
 
				+   * Get an array of all EventRecords that can be extracted for
			
 
				+   * the represented hardware component.
			
 
				+   * 
			
 
				+   * @return The array of EventRecords
			
 
				+   */
			
 
				+  public EventRecord[] monitor();
			
 
				+  
			
 
				+  /**
			
 
				+   * Inserts all EventRecords that can be extracted for
			
 
				+   * the represented hardware component into a LocalStore.
			
 
				+   * 
			
 
				+   * @param ls the LocalStore into which the EventRecords 
			
 
				+   * are to be stored.
			
 
				+   */
			
 
				+  public void monitor(LocalStore ls);
			
 
				+  
			
 
				+  /**
			
 
				+   * Return a String with information about the implementing
			
 
				+   * class 
			
 
				+   * 
			
 
				+   * @return A String describing the implementing class
			
 
				+   */
			
 
				+  public String getInfo();
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/NICParser.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/NICParser.java
@@ -0,0 +1,140 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.net.InetAddress;
			
 
				+import java.net.UnknownHostException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Calendar;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * Objects of this class parse the output of ifconfig to 
			
 
				+ * gather information about present Network Interface Cards
			
 
				+ * in the system. The list of NICs to poll is specified in the 
			
 
				+ * configuration file.
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+
			
 
				+public class NICParser extends ShellParser {
			
 
				+
			
 
				+  String[] nics;
			
 
				+
			
 
				+  /**
			
 
				+   * Constructs a NICParser and reads the list of NICs to query
			
 
				+   */
			
 
				+  public NICParser() {
			
 
				+    super();
			
 
				+    nics = Environment.getProperty("nic.list").split(",\\s*");
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Reads and parses the output of ifconfig for a specified NIC and 
			
 
				+   * creates an appropriate EventRecord that holds the desirable 
			
 
				+   * information for it.
			
 
				+   * 
			
 
				+   * @param device the NIC device name to query
			
 
				+   * 
			
 
				+   * @return the EventRecord created
			
 
				+   */
			
 
				+  public EventRecord query(String device) throws UnknownHostException {
			
 
				+    CharSequence sb = Environment.runCommandGeneric("/sbin/ifconfig " + device);
			
 
				+    EventRecord retval = new EventRecord(InetAddress.getLocalHost()
			
 
				+        .getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
			
 
				+        .getHostName()), Calendar.getInstance(), "NIC", "Unknown", device, "-");
			
 
				+
			
 
				+    retval.set("hwAddress", findPattern("HWaddr\\s*([\\S{2}:]{17})", sb
			
 
				+        .toString(), 1));
			
 
				+
			
 
				+    retval.set("ipAddress", findPattern("inet\\s+addr:\\s*([\\w.?]*)", sb
			
 
				+        .toString(), 1));
			
 
				+
			
 
				+    String tmp = findPattern("inet\\s+addr:\\s*([\\w.?]*)", sb.toString(), 1);
			
 
				+    retval.set("status", (tmp == null) ? "DOWN" : "UP");
			
 
				+    if (tmp != null)
			
 
				+      retval.set("ipAddress", tmp);
			
 
				+
			
 
				+    retval.set("rxPackets", findPattern("RX\\s*packets\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("rxErrors", findPattern("RX.+errors\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("rxDropped", findPattern("RX.+dropped\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("rxOverruns", findPattern("RX.+overruns\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("rxFrame", findPattern("RX.+frame\\s*:\\s*(\\d+)",
			
 
				+        sb.toString(), 1));
			
 
				+
			
 
				+    retval.set("txPackets", findPattern("TX\\s*packets\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("txErrors", findPattern("TX.+errors\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("txDropped", findPattern("TX.+dropped\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("txOverruns", findPattern("TX.+overruns\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("txCarrier", findPattern("TX.+carrier\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+
			
 
				+    retval.set("collisions", findPattern("\\s+collisions\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+
			
 
				+    retval.set("rxBytes", findPattern("RX\\s*bytes\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("txBytes", findPattern("TX\\s*bytes\\s*:\\s*(\\d+)", sb
			
 
				+        .toString(), 1));
			
 
				+
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Invokes query() to do the parsing and handles parsing errors for 
			
 
				+   * each one of the NICs specified in the configuration. 
			
 
				+   * 
			
 
				+   * @return an array of EventRecords that holds one element that represents
			
 
				+   * the current state of network interfaces.
			
 
				+   */
			
 
				+  public EventRecord[] monitor() {
			
 
				+    ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
			
 
				+
			
 
				+    for (String nic : nics) {
			
 
				+      try {
			
 
				+        recs.add(query(nic));
			
 
				+      } catch (UnknownHostException e) {
			
 
				+        e.printStackTrace();
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    EventRecord[] T = new EventRecord[recs.size()];
			
 
				+
			
 
				+    return recs.toArray(T);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Return a String with information about this class
			
 
				+   * 
			
 
				+   * @return A String describing this class
			
 
				+   */
			
 
				+  public String getInfo() {
			
 
				+    String retval = "ifconfig parser for interfaces: ";
			
 
				+    for (String nic : nics)
			
 
				+      retval += nic + " ";
			
 
				+    return retval;
			
 
				+  }
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/OfflineAnonymizer.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/OfflineAnonymizer.java
@@ -0,0 +1,132 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.io.BufferedWriter;
			
 
				+import java.io.File;
			
 
				+import java.io.FileWriter;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * This class can be used to anonymize logs independently of
			
 
				+ * Hadoop and the Executor. It parses the specified log file to
			
 
				+ * create log records for it and then passes them to the Anonymizer.
			
 
				+ * After they are anonymized, they are written to a local file,
			
 
				+ * which is then compressed and stored locally.
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class OfflineAnonymizer {
			
 
				+
			
 
				+  public enum LogType {
			
 
				+    HADOOP, SYSTEM
			
 
				+  };
			
 
				+
			
 
				+  LogType logtype;
			
 
				+
			
 
				+  File logfile;
			
 
				+
			
 
				+  LogParser parser;
			
 
				+
			
 
				+  /**
			
 
				+   * Creates an OfflineAnonymizer for a specific log file.
			
 
				+   * 
			
 
				+   * @param logtype the type of the log file. This can either be
			
 
				+   * LogFile.HADOOP or LogFile.SYSTEM
			
 
				+   * @param filename the path to the log file
			
 
				+   * 
			
 
				+   */  
			
 
				+  public OfflineAnonymizer(LogType logtype, String filename) {
			
 
				+
			
 
				+    logfile = new File(filename);
			
 
				+
			
 
				+    if (!logfile.exists()) {
			
 
				+      System.err.println("Input file does not exist!");
			
 
				+      System.exit(0);
			
 
				+    }
			
 
				+
			
 
				+    if (logtype == LogType.HADOOP)
			
 
				+      parser = new HadoopLogParser(filename);
			
 
				+    else
			
 
				+      parser = new SystemLogParser(filename);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Performs anonymization for the log file. Log entries are
			
 
				+   * read one by one and EventRecords are created, which are then
			
 
				+   * anonymized and written to the output.
			
 
				+   * 
			
 
				+   */
			
 
				+  public void anonymize() throws Exception {
			
 
				+    EventRecord er = null;
			
 
				+    SerializedRecord sr = null;
			
 
				+
			
 
				+    BufferedWriter bfw = new BufferedWriter(new FileWriter(logfile.getName()
			
 
				+        + ".anonymized"));
			
 
				+
			
 
				+    System.out.println("Anonymizing log records...");
			
 
				+    while ((er = parser.getNext()) != null) {
			
 
				+      if (er.isValid()) {
			
 
				+        sr = new SerializedRecord(er);
			
 
				+        Anonymizer.anonymize(sr);
			
 
				+        bfw.write(LocalStore.pack(sr).toString());
			
 
				+        bfw.write(LocalStore.RECORD_SEPARATOR);
			
 
				+      }
			
 
				+    }
			
 
				+    bfw.flush();
			
 
				+    bfw.close();
			
 
				+    System.out.println("Anonymized log records written to " + logfile.getName()
			
 
				+        + ".anonymized");
			
 
				+
			
 
				+    System.out.println("Compressing output file...");
			
 
				+    LocalStore.zipCompress(logfile.getName() + ".anonymized");
			
 
				+    System.out.println("Compressed output file written to " + logfile.getName()
			
 
				+        + ".anonymized" + LocalStore.COMPRESSION_SUFFIX);
			
 
				+  }
			
 
				+
			
 
				+  public static void main(String[] args) {
			
 
				+
			
 
				+    if (args.length < 2) {
			
 
				+      System.out.println("Usage: OfflineAnonymizer <log_type> <filename>");
			
 
				+      System.out
			
 
				+          .println("where <log_type> is either \"hadoop\" or \"system\" and <filename> is the path to the log file");
			
 
				+      System.exit(0);
			
 
				+    }
			
 
				+
			
 
				+    LogType logtype = null;
			
 
				+
			
 
				+    if (args[0].equalsIgnoreCase("-hadoop"))
			
 
				+      logtype = LogType.HADOOP;
			
 
				+    else if (args[0].equalsIgnoreCase("-system"))
			
 
				+      logtype = LogType.SYSTEM;
			
 
				+    else {
			
 
				+      System.err.println("Invalid first argument.");
			
 
				+      System.exit(0);
			
 
				+    }
			
 
				+
			
 
				+    OfflineAnonymizer oa = new OfflineAnonymizer(logtype, args[1]);
			
 
				+
			
 
				+    try {
			
 
				+      oa.anonymize();
			
 
				+    } catch (Exception e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+
			
 
				+    return;
			
 
				+  }
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/PersistentState.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/PersistentState.java
@@ -0,0 +1,163 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.util.Properties;
			
 
				+import java.util.Calendar;
			
 
				+import java.io.FileInputStream;
			
 
				+import java.io.FileOutputStream;
			
 
				+import java.io.FileNotFoundException;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * This class takes care of the information that needs to be
			
 
				+ * persistently stored locally on nodes. Bookkeeping is done for the
			
 
				+ * state of parsing of log files, so that the portion of the file that
			
 
				+ * has already been parsed in previous calls will not be parsed again.
			
 
				+ * For each log file, we maintain the byte offset of the last
			
 
				+ * character parsed in previous passes. Also, the first entry in the
			
 
				+ * log file is stored, so that FailMon can determine when a log file
			
 
				+ * has been rotated (and thus parsing needs to start from the
			
 
				+ * beginning of the file). We use a property file to store that
			
 
				+ * information. For each log file we create a property keyed by the
			
 
				+ * filename, the value of which contains the byte offset and first log
			
 
				+ * entry separated by a SEPARATOR.
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class PersistentState {
			
 
				+
			
 
				+  private final static String SEPARATOR = "###";
			
 
				+  
			
 
				+  static String filename;
			
 
				+  static Properties persData = new Properties();
			
 
				+  
			
 
				+  /**
			
 
				+   * Read the state of parsing for all open log files from a property
			
 
				+   * file.
			
 
				+   * 
			
 
				+   * @param fname the filename of the property file to be read
			
 
				+   */
			
 
				+
			
 
				+  public static void readState(String fname) {
			
 
				+
			
 
				+    filename = fname;
			
 
				+    
			
 
				+    try {
			
 
				+      persData.load(new FileInputStream(filename));
			
 
				+    } catch (FileNotFoundException e1) {
			
 
				+      // ignore
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+   /**
			
 
				+   * Read and return the state of parsing for a particular log file.
			
 
				+   * 
			
 
				+   * @param fname the log file for which to read the state
			
 
				+   */
			
 
				+  public static ParseState getState(String fname) {
			
 
				+    String [] fields = persData.getProperty(fname, "null" + SEPARATOR + "0").split(SEPARATOR, 2);
			
 
				+    String firstLine;
			
 
				+    long offset;
			
 
				+    
			
 
				+    if (fields.length < 2) {
			
 
				+      System.err.println("Malformed persistent state data found");
			
 
				+      Environment.logInfo("Malformed persistent state data found");
			
 
				+      firstLine = null;
			
 
				+      offset = 0;
			
 
				+    } else {
			
 
				+      firstLine = (fields[0].equals("null") ? null : fields[0]);
			
 
				+      offset = Long.parseLong(fields[1]);
			
 
				+    }
			
 
				+
			
 
				+    return new ParseState(fname, firstLine, offset);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the state of parsing for a particular log file.
			
 
				+   * 
			
 
				+   * @param state the ParseState to set
			
 
				+   */
			
 
				+  public static void setState(ParseState state) {
			
 
				+
			
 
				+    if (state == null) {
			
 
				+      System.err.println("Null state found");
			
 
				+      Environment.logInfo("Null state found");
			
 
				+    }
			
 
				+
			
 
				+    persData.setProperty(state.filename, state.firstLine + SEPARATOR + state.offset);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Upadate the state of parsing for a particular log file.
			
 
				+   * 
			
 
				+   * @param filename the log file for which to update the state
			
 
				+   * @param firstLine the first line of the log file currently
			
 
				+   * @param offset the byte offset of the last character parsed
			
 
				+   */ 
			
 
				+  public static void updateState(String filename, String firstLine, long offset) {
			
 
				+
			
 
				+    ParseState ps = getState(filename);
			
 
				+
			
 
				+    if (firstLine != null)
			
 
				+      ps.firstLine = firstLine;
			
 
				+
			
 
				+    ps.offset = offset;
			
 
				+
			
 
				+    setState(ps);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Write the state of parsing for all open log files to a property
			
 
				+   * file on disk.
			
 
				+   * 
			
 
				+   * @param fname the filename of the property file to write to
			
 
				+   */
			
 
				+  public static void writeState(String fname) {
			
 
				+    try {
			
 
				+      persData.store(new FileOutputStream(fname), Calendar.getInstance().getTime().toString());
			
 
				+    } catch (FileNotFoundException e1) {
			
 
				+      e1.printStackTrace();
			
 
				+    } catch (IOException e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+}
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * This class represents the state of parsing for a particular log
			
 
				+ * file.
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+class ParseState {
			
 
				+
			
 
				+  public String filename;
			
 
				+  public String firstLine;
			
 
				+  public long offset;
			
 
				+
			
 
				+  public ParseState(String _filename, String _firstLine, long _offset) {
			
 
				+    this.filename = _filename;
			
 
				+    this.firstLine = _firstLine;
			
 
				+    this.offset = _offset;
			
 
				+  }
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/RunOnce.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/RunOnce.java
@@ -0,0 +1,120 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+/**********************************************************
			
 
				+* Runs a set of monitoring jobs once for the local node. The set of
			
 
				+* jobs to be run is the intersection of the jobs specifed in the
			
 
				+* configuration file and the set of jobs specified in the --only
			
 
				+* command line argument.
			
 
				+ **********************************************************/ 
			
 
				+
			
 
				+public class RunOnce {
			
 
				+
			
 
				+  LocalStore lstore;
			
 
				+
			
 
				+  ArrayList<MonitorJob> monitors;
			
 
				+  
			
 
				+  boolean uploading = true;
			
 
				+  
			
 
				+  public RunOnce(String confFile) {
			
 
				+    
			
 
				+    Environment.prepare(confFile);
			
 
				+    
			
 
				+    String localTmpDir;
			
 
				+    
			
 
				+    // running as a stand-alone application
			
 
				+    localTmpDir = System.getProperty("java.io.tmpdir");
			
 
				+    Environment.setProperty("local.tmp.dir", localTmpDir);
			
 
				+        
			
 
				+    monitors = Environment.getJobs();
			
 
				+    lstore = new LocalStore();
			
 
				+    uploading  = true;
			
 
				+  }
			
 
				+
			
 
				+  private void filter (String [] ftypes) {
			
 
				+    ArrayList<MonitorJob> filtered = new ArrayList<MonitorJob>();
			
 
				+    boolean found;
			
 
				+    
			
 
				+    // filter out unwanted monitor jobs
			
 
				+    for (MonitorJob job : monitors) {
			
 
				+      found = false;
			
 
				+      for (String ftype : ftypes)
			
 
				+	if (job.type.equalsIgnoreCase(ftype))
			
 
				+	    found = true;
			
 
				+      if (found)
			
 
				+	filtered.add(job);
			
 
				+    }
			
 
				+
			
 
				+    // disable uploading if not requested
			
 
				+    found = false;
			
 
				+    for (String ftype : ftypes)
			
 
				+      if (ftype.equalsIgnoreCase("upload"))
			
 
				+	found = true;
			
 
				+
			
 
				+    if (!found)
			
 
				+      uploading = false;
			
 
				+    
			
 
				+    monitors = filtered;
			
 
				+  }
			
 
				+  
			
 
				+  private void run() {
			
 
				+    
			
 
				+    Environment.logInfo("Failmon started successfully.");
			
 
				+
			
 
				+    for (int i = 0; i < monitors.size(); i++) {
			
 
				+      Environment.logInfo("Calling " + monitors.get(i).job.getInfo() + "...\t");
			
 
				+      monitors.get(i).job.monitor(lstore);
			
 
				+    }
			
 
				+
			
 
				+    if (uploading)
			
 
				+      lstore.upload();
			
 
				+
			
 
				+    lstore.close();
			
 
				+  }
			
 
				+
			
 
				+  public void cleanup() {
			
 
				+    // nothing to be done
			
 
				+  }
			
 
				+
			
 
				+  
			
 
				+  public static void main (String [] args) {
			
 
				+
			
 
				+    String configFilePath = "./conf/failmon.properties";
			
 
				+    String [] onlyList = null;
			
 
				+    
			
 
				+    // Parse command-line parameters
			
 
				+    for (int i = 0; i < args.length - 1; i++) {
			
 
				+      if (args[i].equalsIgnoreCase("--config"))
			
 
				+	configFilePath = args[i + 1];
			
 
				+      else if (args[i].equalsIgnoreCase("--only"))
			
 
				+	onlyList = args[i + 1].split(",");
			
 
				+    }
			
 
				+
			
 
				+    RunOnce ro = new RunOnce(configFilePath);
			
 
				+    // only keep the requested types of jobs
			
 
				+    if (onlyList != null)
			
 
				+      ro.filter(onlyList);
			
 
				+    // run once only
			
 
				+    ro.run();
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SMARTParser.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SMARTParser.java
@@ -0,0 +1,206 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.net.InetAddress;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Calendar;
			
 
				+import java.util.regex.Matcher;
			
 
				+import java.util.regex.Pattern;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * Objects of this class parse the output of smartmontools to 
			
 
				+ * gather information about the state of disks in the system. The
			
 
				+ * smartmontools utility reads the S.M.A.R.T. attributes from
			
 
				+ * the disk devices and reports them to the user. Note that since
			
 
				+ * running smartctl requires superuser provileges, one should  
			
 
				+ * grand sudo privileges to the running user for the command smartctl
			
 
				+ * (without a password). Alternatively, one can set up a cron  job that 
			
 
				+ * periodically dumps the output of smartctl into a user-readable file.
			
 
				+ * See the configuration file for details.
			
 
				+ *
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class SMARTParser extends ShellParser {
			
 
				+
			
 
				+  String[] devices;
			
 
				+
			
 
				+  /**
			
 
				+   * Constructs a SMARTParser and reads the list of disk 
			
 
				+   * devices to query
			
 
				+   */
			
 
				+  public SMARTParser() {
			
 
				+    super();
			
 
				+    String devicesStr = Environment.getProperty("disks.list");
			
 
				+    System.out.println("skato " + devicesStr);
			
 
				+    if (devicesStr != null)
			
 
				+      devices = devicesStr.split(",\\s*");
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Reads and parses the output of smartctl for a specified disk and 
			
 
				+   * creates an appropriate EventRecord that holds the desirable 
			
 
				+   * information for it. Since the output of smartctl is different for 
			
 
				+   * different kinds of disks, we try to identify as many attributes as 
			
 
				+   * posssible for all known output formats. 
			
 
				+   * 
			
 
				+   * @param device the disk device name to query
			
 
				+   * 
			
 
				+   * @return the EventRecord created
			
 
				+   */
			
 
				+  public EventRecord query(String device) throws Exception {
			
 
				+    String conf = Environment.getProperty("disks." + device + ".source");
			
 
				+    CharSequence sb;
			
 
				+
			
 
				+    if (conf == null)
			
 
				+      sb = Environment.runCommandGeneric("sudo smartctl --all " + device);
			
 
				+    else
			
 
				+      sb = Environment.runCommandGeneric("cat " + conf);
			
 
				+
			
 
				+    EventRecord retval = new EventRecord(InetAddress.getLocalHost()
			
 
				+        .getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
			
 
				+        .getHostName()), Calendar.getInstance(), "SMART", "Unknown",
			
 
				+        (conf == null ? "sudo smartctl --all " + device : "file " + conf), "-");
			
 
				+    // IBM SCSI disks
			
 
				+    retval.set("model", findPattern("Device\\s*:\\s*(.*)", sb.toString(), 1));
			
 
				+    retval.set("serial", findPattern("Serial\\s+Number\\s*:\\s*(.*)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("firmware", findPattern("Firmware\\s+Version\\s*:\\s*(.*)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("capacity", findPattern("User\\s+Capacity\\s*:\\s*(.*)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("status", findPattern("SMART\\s*Health\\s*Status:\\s*(.*)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("current_temperature", findPattern(
			
 
				+        "Current\\s+Drive\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1));
			
 
				+    retval.set("trip_temperature", findPattern(
			
 
				+        "Drive\\s+Trip\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1));
			
 
				+    retval.set("start_stop_count", findPattern(
			
 
				+        "start\\s+stop\\s+count\\s*:\\s*(\\d*)", sb.toString(), 1));
			
 
				+
			
 
				+    String[] var = { "read", "write", "verify" };
			
 
				+    for (String s : var) {
			
 
				+      retval.set(s + "_ecc_fast", findPattern(s + "\\s*:\\s*(\\d*)", sb
			
 
				+          .toString(), 1));
			
 
				+      retval.set(s + "_ecc_delayed", findPattern(s
			
 
				+          + "\\s*:\\s*(\\d+\\s+){1}(\\d+)", sb.toString(), 2));
			
 
				+      retval.set(s + "_rereads", findPattern(
			
 
				+          s + "\\s*:\\s*(\\d+\\s+){2}(\\d+)", sb.toString(), 2));
			
 
				+      retval.set(s + "_GBs", findPattern(s
			
 
				+          + "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*)", sb.toString(), 2));
			
 
				+      retval.set(s + "_uncorrected",
			
 
				+          findPattern(s + "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*){1}\\s+(\\d+)", sb
			
 
				+              .toString(), 3));
			
 
				+    }
			
 
				+
			
 
				+    // Hitachi IDE, SATA
			
 
				+    retval.set("model", findPattern("Device\\s*Model\\s*:\\s*(.*)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("serial", findPattern("Serial\\s+number\\s*:\\s*(.*)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("protocol", findPattern("Transport\\s+protocol\\s*:\\s*(.*)", sb
			
 
				+        .toString(), 1));
			
 
				+    retval.set("status", "PASSED".equalsIgnoreCase(findPattern(
			
 
				+        "test\\s*result\\s*:\\s*(.*)", sb.toString(), 1)) ? "OK" : "FAILED");
			
 
				+
			
 
				+    readColumns(retval, sb);
			
 
				+
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Reads attributes in the following format:
			
 
				+   * 
			
 
				+   * ID# ATTRIBUTE_NAME          FLAG     VALUE WORST THRESH TYPE      UPDATED  WHEN_FAILED RAW_VALUE
			
 
				+   * 3 Spin_Up_Time             0x0027   180   177   063    Pre-fail  Always       -       10265
			
 
				+   * 4 Start_Stop_Count         0x0032   253   253   000    Old_age   Always       -       34
			
 
				+   * 5 Reallocated_Sector_Ct    0x0033   253   253   063    Pre-fail  Always       -       0
			
 
				+   * 6 Read_Channel_Margin      0x0001   253   253   100    Pre-fail  Offline      -       0
			
 
				+   * 7 Seek_Error_Rate          0x000a   253   252   000    Old_age   Always       -       0
			
 
				+   * 8 Seek_Time_Performance    0x0027   250   224   187    Pre-fail  Always       -       53894
			
 
				+   * 9 Power_On_Minutes         0x0032   210   210   000    Old_age   Always       -       878h+00m
			
 
				+   * 10 Spin_Retry_Count        0x002b   253   252   157    Pre-fail  Always       -       0
			
 
				+   * 11 Calibration_Retry_Count 0x002b   253   252   223    Pre-fail  Always       -       0
			
 
				+   * 12 Power_Cycle_Count       0x0032   253   253   000    Old_age   Always       -       49
			
 
				+   * 192 PowerOff_Retract_Count 0x0032   253   253   000    Old_age   Always       -       0
			
 
				+   * 193 Load_Cycle_Count       0x0032   253   253   000    Old_age   Always       -       0
			
 
				+   * 194 Temperature_Celsius    0x0032   037   253   000    Old_age   Always       -       37
			
 
				+   * 195 Hardware_ECC_Recovered 0x000a   253   252   000    Old_age   Always       -       2645
			
 
				+   * 
			
 
				+   * This format is mostly found in IDE and SATA disks.
			
 
				+   * 
			
 
				+   * @param er the EventRecord in which to store attributes found
			
 
				+   * @param sb the text to parse
			
 
				+   * 
			
 
				+   * @return the EventRecord in which new attributes are stored.
			
 
				+   */
			
 
				+  private EventRecord readColumns(EventRecord er, CharSequence sb) {
			
 
				+
			
 
				+    Pattern pattern = Pattern.compile("^\\s{0,2}(\\d{1,3}\\s+.*)$",
			
 
				+        Pattern.MULTILINE);
			
 
				+    Matcher matcher = pattern.matcher(sb);
			
 
				+
			
 
				+    while (matcher.find()) {
			
 
				+      String[] tokens = matcher.group(1).split("\\s+");
			
 
				+      boolean failed = false;
			
 
				+      // check if this attribute is a failed one
			
 
				+      if (!tokens[8].equals("-"))
			
 
				+        failed = true;
			
 
				+      er.set(tokens[1].toLowerCase(), (failed ? "FAILED:" : "") + tokens[9]);
			
 
				+    }
			
 
				+
			
 
				+    return er;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Invokes query() to do the parsing and handles parsing errors for 
			
 
				+   * each one of the disks specified in the configuration. 
			
 
				+   * 
			
 
				+   * @return an array of EventRecords that holds one element that represents
			
 
				+   * the current state of the disk devices.
			
 
				+   */
			
 
				+  public EventRecord[] monitor() {
			
 
				+    ArrayList<EventRecord> recs = new ArrayList<EventRecord>();
			
 
				+
			
 
				+    for (String device : devices) {
			
 
				+      try {
			
 
				+        recs.add(query(device));
			
 
				+      } catch (Exception e) {
			
 
				+        e.printStackTrace();
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    EventRecord[] T = new EventRecord[recs.size()];
			
 
				+
			
 
				+    return recs.toArray(T);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Return a String with information about this class
			
 
				+   * 
			
 
				+   * @return A String describing this class
			
 
				+   */
			
 
				+  public String getInfo() {
			
 
				+    String retval = "S.M.A.R.T. disk attributes parser for disks ";
			
 
				+    for (String device : devices)
			
 
				+      retval += device + " ";
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SensorsParser.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SensorsParser.java
@@ -0,0 +1,112 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.net.InetAddress;
			
 
				+import java.util.Calendar;
			
 
				+import java.util.regex.Matcher;
			
 
				+import java.util.regex.Pattern;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * Objects of this class parse the output of the lm-sensors utility 
			
 
				+ * to gather information about fan speed, temperatures for cpus
			
 
				+ * and motherboard etc.
			
 
				+ *
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class SensorsParser extends ShellParser {
			
 
				+
			
 
				+  /**
			
 
				+   * Reads and parses the output of the 'sensors' command 
			
 
				+   * and creates an appropriate EventRecord that holds 
			
 
				+   * the desirable information.
			
 
				+   * 
			
 
				+   * @param s unused parameter
			
 
				+   * 
			
 
				+   * @return the EventRecord created
			
 
				+   */
			
 
				+  public EventRecord query(String s) throws Exception {
			
 
				+    CharSequence sb;
			
 
				+
			
 
				+    //sb = Environment.runCommandGeneric("sensors -A");
			
 
				+     sb = Environment.runCommandGeneric("cat sensors.out");
			
 
				+
			
 
				+    EventRecord retval = new EventRecord(InetAddress.getLocalHost()
			
 
				+        .getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost()
			
 
				+        .getHostName()), Calendar.getInstance(), "lm-sensors", "Unknown",
			
 
				+        "sensors -A", "-");
			
 
				+    readGroup(retval, sb, "fan");
			
 
				+    readGroup(retval, sb, "in");
			
 
				+    readGroup(retval, sb, "temp");
			
 
				+    readGroup(retval, sb, "Core");
			
 
				+
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Reads and parses lines that provide the output
			
 
				+   * of a group of sensors with the same functionality.
			
 
				+   * 
			
 
				+   * @param er the EventRecord to which the new attributes are added
			
 
				+   * @param sb the text to parse
			
 
				+   * @param prefix a String prefix specifying the common prefix of the
			
 
				+   * sensors' names in the group (e.g. "fan", "in", "temp"
			
 
				+   * 
			
 
				+   * @return the EventRecord created
			
 
				+   */
			
 
				+  private EventRecord readGroup(EventRecord er, CharSequence sb, String prefix) {
			
 
				+
			
 
				+    Pattern pattern = Pattern.compile(".*(" + prefix
			
 
				+        + "\\s*\\d*)\\s*:\\s*(\\+?\\d+)", Pattern.MULTILINE);
			
 
				+    Matcher matcher = pattern.matcher(sb);
			
 
				+
			
 
				+    while (matcher.find())
			
 
				+      er.set(matcher.group(1), matcher.group(2));
			
 
				+
			
 
				+    return er;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Invokes query() to do the parsing and handles parsing errors. 
			
 
				+   * 
			
 
				+   * @return an array of EventRecords that holds one element that represents
			
 
				+   * the current state of the hardware sensors
			
 
				+   */
			
 
				+  public EventRecord[] monitor() {
			
 
				+    EventRecord[] recs = new EventRecord[1];
			
 
				+
			
 
				+    try {
			
 
				+      recs[0] = query(null);
			
 
				+    } catch (Exception e) {
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+
			
 
				+    return recs;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Return a String with information about this class
			
 
				+   * 
			
 
				+   * @return A String describing this class
			
 
				+   */
			
 
				+  public String getInfo() {
			
 
				+    return ("lm-sensors parser");
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SerializedRecord.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SerializedRecord.java
@@ -0,0 +1,163 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.net.InetAddress;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Calendar;
			
 
				+import java.util.HashMap;
			
 
				+import java.text.DateFormat;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * Objects of this class hold the serialized representations
			
 
				+ * of EventRecords. A SerializedRecord is essentially an EventRecord
			
 
				+ * with all its property values converted to strings. It also provides 
			
 
				+ * some convenience methods for printing the property fields in a 
			
 
				+ * more readable way.
			
 
				+ *
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class SerializedRecord {
			
 
				+
			
 
				+  HashMap<String, String> fields;
			
 
				+  private static DateFormat dateFormatter =
			
 
				+    DateFormat.getDateTimeInstance(DateFormat.LONG, DateFormat.LONG);;
			
 
				+
			
 
				+  /**
			
 
				+   * Create the SerializedRecord given an EventRecord.
			
 
				+   */
			
 
				+  
			
 
				+  public SerializedRecord(EventRecord source) {
			
 
				+    fields = new HashMap<String, String>();
			
 
				+    fields.clear();
			
 
				+
			
 
				+    for (String k : source.getMap().keySet()) {
			
 
				+      ArrayList<String> strs = getStrings(source.getMap().get(k));
			
 
				+      if (strs.size() == 1)
			
 
				+        fields.put(k, strs.get(0));
			
 
				+      else
			
 
				+        for (int i = 0; i < strs.size(); i++)
			
 
				+          fields.put(k + "#" + i, strs.get(i));
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Extract String representations from an Object.
			
 
				+   * 
			
 
				+   * @param o the input object
			
 
				+   * 
			
 
				+   * @return an ArrayList that contains Strings found in o
			
 
				+   */
			
 
				+  private ArrayList<String> getStrings(Object o) {
			
 
				+    ArrayList<String> retval = new ArrayList<String>();
			
 
				+    retval.clear();
			
 
				+    if (o == null)
			
 
				+      retval.add("null");
			
 
				+    else if (o instanceof String)
			
 
				+      retval.add((String) o);
			
 
				+    else if (o instanceof Calendar)
			
 
				+      retval.add(dateFormatter.format(((Calendar) o).getTime()));
			
 
				+    else if (o instanceof InetAddress[])
			
 
				+      for (InetAddress ip : ((InetAddress[]) o))
			
 
				+        retval.add(ip.getHostAddress());
			
 
				+    else if (o instanceof String[])
			
 
				+      for (String s : (String []) o)
			
 
				+        retval.add(s);
			
 
				+    else
			
 
				+      retval.add(o.toString());
			
 
				+
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the value of a property of the EventRecord.
			
 
				+   * 
			
 
				+   * @param fieldName the name of the property to set
			
 
				+   * @param fieldValue the value of the property to set
			
 
				+   * 
			
 
				+   */
			
 
				+  public void set(String fieldName, String fieldValue) {
			
 
				+    fields.put(fieldName, fieldValue);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the value of a property of the EventRecord.
			
 
				+   * If the property with the specific key is not found,
			
 
				+   * null is returned.
			
 
				+   * 
			
 
				+   * @param fieldName the name of the property to get.
			
 
				+   */
			
 
				+  public String get(String fieldName) {
			
 
				+    return fields.get(fieldName);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Arrange the keys to provide a more readable printing order:
			
 
				+   * first goes the timestamp, then the hostname and then the type, followed
			
 
				+   * by all other keys found.
			
 
				+   * 
			
 
				+   * @param keys The input ArrayList of keys to re-arrange.
			
 
				+   */
			
 
				+  public static void arrangeKeys(ArrayList<String> keys) {
			
 
				+    move(keys, "timestamp", 0);
			
 
				+    move(keys, "hostname", 1);
			
 
				+    move(keys, "type", 2);
			
 
				+  }
			
 
				+
			
 
				+  private static void move(ArrayList<String> keys, String key, int position) {
			
 
				+    int cur = keys.indexOf(key);
			
 
				+    if (cur == -1)
			
 
				+      return;
			
 
				+    keys.set(cur, keys.get(position));
			
 
				+    keys.set(position, key);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Check if the SerializedRecord is a valid one, i.e., whether
			
 
				+   * it represents meaningful metric values.
			
 
				+   * 
			
 
				+   * @return true if the EventRecord is a valid one, false otherwise.
			
 
				+   */
			
 
				+  public boolean isValid() {
			
 
				+    return !("invalid".equalsIgnoreCase(fields.get("hostname")));
			
 
				+  }
			
 
				+
			
 
				+  
			
 
				+  /**
			
 
				+   * Creates and returns a string reperssentation of the object
			
 
				+   * 
			
 
				+   * @return a String representing the object
			
 
				+   */
			
 
				+
			
 
				+  public String toString() {
			
 
				+    String retval = "";
			
 
				+    ArrayList<String> keys = new ArrayList<String>(fields.keySet());
			
 
				+    arrangeKeys(keys);
			
 
				+
			
 
				+    for (int i = 0; i < keys.size(); i++) {
			
 
				+      String value = fields.get(keys.get(i));
			
 
				+      if (value == null)
			
 
				+        retval += keys.get(i) + ":\tnull\n";
			
 
				+      else
			
 
				+        retval += keys.get(i) + ":\t" + value + "\n";
			
 
				+    }
			
 
				+    return retval;
			
 
				+  }
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/ShellParser.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/ShellParser.java
@@ -0,0 +1,102 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.util.regex.Matcher;
			
 
				+import java.util.regex.Pattern;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * Objects of this class parse the output of system command-line
			
 
				+ * utilities that can give information about the state of  
			
 
				+ * various hardware components in the system. Typically, each such
			
 
				+ * object either invokes a command and reads its output or reads the 
			
 
				+ * output of one such command from a file on the disk. Currently 
			
 
				+ * supported utilities include ifconfig, smartmontools, lm-sensors,
			
 
				+ * /proc/cpuinfo.
			
 
				+ *
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public abstract class ShellParser implements Monitored {
			
 
				+
			
 
				+  /**
			
 
				+   * Find the first occurence ofa pattern in a piece of text 
			
 
				+   * and return a specific group.
			
 
				+   * 
			
 
				+   *  @param strPattern the regular expression to match
			
 
				+   *  @param text the text to search
			
 
				+   *  @param grp the number of the matching group to return
			
 
				+   *  
			
 
				+   *  @return a String containing the matched group of the regular expression
			
 
				+   */
			
 
				+  protected String findPattern(String strPattern, String text, int grp) {
			
 
				+
			
 
				+    Pattern pattern = Pattern.compile(strPattern, Pattern.MULTILINE);
			
 
				+    Matcher matcher = pattern.matcher(text);
			
 
				+
			
 
				+    if (matcher.find(0))
			
 
				+      return matcher.group(grp);
			
 
				+
			
 
				+    return null;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Finds all occurences of a pattern in a piece of text and returns 
			
 
				+   * the matching groups.
			
 
				+   * 
			
 
				+   *  @param strPattern the regular expression to match
			
 
				+   *  @param text the text to search
			
 
				+   *  @param grp the number of the matching group to return
			
 
				+   *  @param separator the string that separates occurences in the returned value
			
 
				+   *  
			
 
				+   *  @return a String that contains all occurences of strPattern in text, 
			
 
				+   *  separated by separator
			
 
				+   */
			
 
				+  protected String findAll(String strPattern, String text, int grp,
			
 
				+      String separator) {
			
 
				+
			
 
				+    String retval = "";
			
 
				+    boolean firstTime = true;
			
 
				+
			
 
				+    Pattern pattern = Pattern.compile(strPattern);
			
 
				+    Matcher matcher = pattern.matcher(text);
			
 
				+
			
 
				+    while (matcher.find()) {
			
 
				+      retval += (firstTime ? "" : separator) + matcher.group(grp);
			
 
				+      firstTime = false;
			
 
				+    }
			
 
				+
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Insert all EventRecords that can be extracted for
			
 
				+   * the represented hardware component into a LocalStore.
			
 
				+   * 
			
 
				+   * @param ls the LocalStore into which the EventRecords 
			
 
				+   * are to be stored.
			
 
				+   */
			
 
				+  public void monitor(LocalStore ls) {
			
 
				+    ls.insert(monitor());
			
 
				+  }
			
 
				+
			
 
				+  abstract public EventRecord[] monitor();
			
 
				+
			
 
				+  abstract public EventRecord query(String s) throws Exception;
			
 
				+
			
 
				+}
			
--- a/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SystemLogParser.java
+++ b/common/src/contrib/failmon/src/java/org/apache/hadoop/contrib/failmon/SystemLogParser.java
@@ -0,0 +1,126 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.contrib.failmon;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.Calendar;
			
 
				+import java.util.regex.Matcher;
			
 
				+import java.util.regex.Pattern;
			
 
				+
			
 
				+/**********************************************************
			
 
				+ * An object of this class parses a Unix system log file to create
			
 
				+ * appropriate EventRecords. Currently, only the syslogd logging 
			
 
				+ * daemon is supported.
			
 
				+ * 
			
 
				+ **********************************************************/
			
 
				+
			
 
				+public class SystemLogParser extends LogParser {
			
 
				+
			
 
				+  static String[] months = { "January", "February", "March", "April", "May",
			
 
				+      "June", "July", "August", "September", "October", "November", "December" };
			
 
				+  /**
			
 
				+   * Create a new parser object .
			
 
				+   */  
			
 
				+  public SystemLogParser(String fname) {
			
 
				+    super(fname);
			
 
				+    if ((dateformat = Environment.getProperty("log.system.dateformat")) == null)
			
 
				+      dateformat = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\\s+(\\d+)";
			
 
				+    if ((timeformat = Environment.getProperty("log.system.timeformat")) == null)
			
 
				+      timeformat = "\\d{2}:\\d{2}:\\d{2}";
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Parses one line of the log. If the line contains a valid 
			
 
				+   * log entry, then an appropriate EventRecord is returned, after all
			
 
				+   * relevant fields have been parsed.
			
 
				+   *
			
 
				+   *  @param line the log line to be parsed
			
 
				+   *    
			
 
				+   *  @return the EventRecord representing the log entry of the line. If 
			
 
				+   *  the line does not contain a valid log entry, then the EventRecord 
			
 
				+   *  returned has isValid() = false. When the end-of-file has been reached,
			
 
				+   *  null is returned to the caller.
			
 
				+   */
			
 
				+  public EventRecord parseLine(String line) throws IOException {
			
 
				+
			
 
				+    EventRecord retval = null;
			
 
				+
			
 
				+    if (line != null) {
			
 
				+      // process line
			
 
				+      String patternStr = "(" + dateformat + ")";
			
 
				+      patternStr += "\\s+";
			
 
				+      patternStr += "(" + timeformat + ")";
			
 
				+      patternStr += "\\s+(\\S*)\\s"; // for hostname
			
 
				+//      patternStr += "\\s*([\\w+\\.?]+)"; // for source
			
 
				+      patternStr += ":?\\s*(.+)"; // for the message
			
 
				+      Pattern pattern = Pattern.compile(patternStr);
			
 
				+      Matcher matcher = pattern.matcher(line);
			
 
				+      if (matcher.find() && matcher.groupCount() >= 0) {
			
 
				+        retval = new EventRecord(hostname, ips, parseDate(matcher.group(1),
			
 
				+            matcher.group(4)), "SystemLog", "Unknown", // loglevel
			
 
				+            "Unknown", // source
			
 
				+            matcher.group(6)); // message
			
 
				+      } else {
			
 
				+        retval = new EventRecord();
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Parse a date found in the system log.
			
 
				+   * 
			
 
				+   * @return a Calendar representing the date
			
 
				+   */
			
 
				+  protected Calendar parseDate(String strDate, String strTime) {
			
 
				+    Calendar retval = Calendar.getInstance();
			
 
				+    // set date
			
 
				+    String[] fields = strDate.split("\\s+");
			
 
				+    retval.set(Calendar.MONTH, parseMonth(fields[0]));
			
 
				+    retval.set(Calendar.DATE, Integer.parseInt(fields[1]));
			
 
				+    // set time
			
 
				+    fields = strTime.split(":");
			
 
				+    retval.set(Calendar.HOUR_OF_DAY, Integer.parseInt(fields[0]));
			
 
				+    retval.set(Calendar.MINUTE, Integer.parseInt(fields[1]));
			
 
				+    retval.set(Calendar.SECOND, Integer.parseInt(fields[2]));
			
 
				+    return retval;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Convert the name of a month to the corresponding int value.
			
 
				+   * 
			
 
				+   * @return the int representation of the month.
			
 
				+   */
			
 
				+  private int parseMonth(String month) {
			
 
				+    for (int i = 0; i < months.length; i++)
			
 
				+      if (months[i].startsWith(month))
			
 
				+        return i;
			
 
				+    return -1;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Return a String with information about this class
			
 
				+   * 
			
 
				+   * @return A String describing this class
			
 
				+   */
			
 
				+  public String getInfo() {
			
 
				+    return ("System Log Parser for file : " + file.getAbsoluteFile());
			
 
				+  }
			
 
				+}
			
--- a/common/src/contrib/hod/CHANGES.txt
+++ b/common/src/contrib/hod/CHANGES.txt
@@ -0,0 +1,272 @@
 
				+HOD Change Log
			
 
				+
			
 
				+Trunk (unreleased changes)
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-5022. Provide an option to remove all log files older
			
 
				+    than the configured time via logcondense.
			
 
				+    (Peeyush Bishnoi via yhemanth)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-2898. Provide an option to specify a port range for
			
 
				+    Hadoop services provisioned by HOD.
			
 
				+    (Peeyush Bishnoi via yhemanth)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-5113. Fixed logcondense to remove files for usernames
			
 
				+    beginning with characters specified in the -l option.
			
 
				+    (Peeyush Bishnoi via yhemanth)
			
 
				+
			
 
				+Release 0.20.0 - (unreleased changes)
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-4705. Grant read permissions for files/directories
			
 
				+    created by HOD. (Peeyush Bishnoi via yhemanth)
			
 
				+
			
 
				+    HADOOP-4937. Include ringmaster RPC port in the notes
			
 
				+    attribute. (Peeyush Bishnoi via yhemanth)
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-4782. Revert umask changes in HADOOP-4705 so that
			
 
				+    files are still securely created. (Peeyush Bishnoi via
			
 
				+    yhemanth)
			
 
				+
			
 
				+Release 0.19.0 - 2008-11-18
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-3695. Provide an ability to start multiple workers per node.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+  OPTIMIZATIONS
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3959. Pass options specified in resource_manager.options to
			
 
				+    job submission. 
			
 
				+    (Craig Macdonald and Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-3814. Remove generation of dfs.client.buffer.dir for the generated
			
 
				+    hadoop-site.xml. (Vinod Kumar Vavilapalli via acmurthy)
			
 
				+
			
 
				+Release 0.18.2 - Unreleased 
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3786. Use HDFS instead of DFS in all docs and hyperlink to Torque.
			
 
				+    (Vinod Kumar Vavilapalli via acmurthy)
			
 
				+
			
 
				+Release 0.18.1 - 2008-09-17
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-4060. Modified HOD to rotate log files on the client side.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-4145. Add an accounting plugin (script) for HOD.
			
 
				+    (Hemanth Yamijala via nigel)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-4161. Fixed bug in HOD cleanup that had the potential to
			
 
				+    hang clients. (Vinod Kumar Vavilapalli via nigel)
			
 
				+
			
 
				+Release 0.18.0 - 2008-08-19
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-3483. Modified HOD to create a cluster directory if one does not
			
 
				+    exist and to auto-deallocate a cluster while reallocating it, if it is
			
 
				+    already dead. (Hemanth Yamijala via mukund)
			
 
				+
			
 
				+    HADOOP-3184. Modified HOD to handle master failures on bad nodes by trying 
			
 
				+    to bring them up on another node in the ring. (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-3610. Modified HOD to create cluster directory if one does not
			
 
				+    exist when using the script option. (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-3808. Modified HOD to include RPC port of the JobTracker
			
 
				+    into the notes attribute of the resource manager. (yhemanth)
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-3376: Provide a mechanism to detect and handle violations to 
			
 
				+    resource manager limits. (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-3151. Improves error messages when reporting failures due to 
			
 
				+    incorrect parameters passed to HOD. (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-3464. Implemented a mechanism to transfer HOD errors that occur on
			
 
				+    compute nodes to the submit node running the HOD client, so users have good
			
 
				+    feedback on why an allocation failed. (Vinod Kumar Vavilapalli via mukund)
			
 
				+
			
 
				+    HADOOP-3505. Updated HOD documentation with changes made for Hadoop
			
 
				+    0.18. (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+ 
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2961. Avoids unnecessary checks for some configuration parameters
			
 
				+    related to service configuration. (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-3523. Fixes auto-deallocation of cluster if job id is not found in
			
 
				+    Torque's job list (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-3531. Fixes a bug related to handling JobTracker failures because of
			
 
				+    timing issues on slow nodes. (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-3564. HOD generates values for the parameter dfs.datanode.ipc.address
			
 
				+    in the hadoop-site.xml created on datanodes. 
			
 
				+    (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-3076. Fixes a bug related to a spurious message about the 
			
 
				+    script.exitcode file when a cluster directory is specified as a relative
			
 
				+    path. (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-3668. Makes editorial changes to HOD documentation.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-3703. Fixes logcondense.py to use the new format of hadoop dfs -lsr
			
 
				+    command line output format. (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+Release 0.17.3 - Unreleased 
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3217. Decrease the rate at which the hod queries the resource
			
 
				+    manager for job status. (Hemanth Yamijala via acmurthy) 
			
 
				+
			
 
				+Release 0.17.0 - 2008-05-18
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-3137. Modified build script to pick up version automatically
			
 
				+    from Hadoop build. (yhemanth)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-2775.  Adds unit test framework for HOD.
			
 
				+    (Vinod Kumar Vavilapalli via ddas).
			
 
				+
			
 
				+    HADOOP-2848. [HOD]hod -o list and deallocate works even after deleting
			
 
				+    the cluster directory. (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-2899. [HOD] Cleans up hdfs:///mapredsystem directory after
			
 
				+    deallocation. (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-2796. Enables distinguishing exit codes from user code vis-a-vis
			
 
				+    HOD's exit code. (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-2947. HOD redirects stdout and stderr of daemons to assist
			
 
				+    getting stack traces. (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2924. Fixes an address problem to do with TaskTracker binding
			
 
				+    to an address. (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-2970. Fixes a problem to do with Wrong class definition for
			
 
				+    hodlib/Hod/hod.py for Python < 2.5.1.
			
 
				+    (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-2783. Fixes a problem to do with import in
			
 
				+    hod/hodlib/Common/xmlrpc.py. (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-2936. Fixes HOD in a way that it generates hdfs://host:port on the
			
 
				+    client side configs. (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-2983. [HOD] Fixes the problem - local_fqdn() returns None when
			
 
				+    gethostbyname_ex doesnt return any FQDNs. (Craig Macdonald via ddas)
			
 
				+
			
 
				+    HADOOP-2982. Fixes a problem in the way HOD looks for free nodes.
			
 
				+    (Hemanth Yamijala via ddas)
			
 
				+
			
 
				+    HADOOP-2855. Fixes the way HOD handles relative paths for cluster
			
 
				+    directory, script file and other options.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-3153. Fixes the way HOD handles allocation if the user has no
			
 
				+    permissions to update the clusters state file.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+Release 0.16.4 - 2008-05-05
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3304. [HOD] Fixes the way the logcondense.py utility searches
			
 
				+    for log files that need to be deleted. (yhemanth via mukund)
			
 
				+
			
 
				+Release 0.16.2 - 2008-04-02
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-3103. [HOD] Hadoop.tmp.dir should not be set to cluster
			
 
				+    directory. (Vinod Kumar Vavilapalli via ddas).
			
 
				+
			
 
				+Release 0.16.1 - 2008-03-13
			
 
				+
			
 
				+  INCOMPATIBLE CHANGES
			
 
				+
			
 
				+    HADOOP-2861. Improve the user interface for the HOD commands.
			
 
				+    Command line structure has changed. (Hemanth Yamijala via nigel)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HADOOP-2730. HOD documentation update.
			
 
				+    (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-2911. Make the information printed by the HOD allocate and
			
 
				+    info commands less verbose and clearer. (Vinod Kumar via nigel)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2766. Enables setting of HADOOP_OPTS env variable for the hadoop
			
 
				+    daemons through HOD. (Vinod Kumar Vavilapalli via ddas)
			
 
				+
			
 
				+    HADOOP-2809.  Fix HOD syslog config syslog-address so that it works.
			
 
				+    (Hemanth Yamijala via nigel)
			
 
				+
			
 
				+    HADOOP-2847.  Ensure idle cluster cleanup works even if the JobTracker
			
 
				+    becomes unresponsive to RPC calls. (Hemanth Yamijala via nigel)
			
 
				+
			
 
				+    HADOOP-2925. Fix HOD to create the mapred system directory using a
			
 
				+    naming convention that will avoid clashes in multi-user shared
			
 
				+    cluster scenario. (Hemanth Yamijala via nigel)
			
 
				+
			
 
				+Release 0.16.0 - 2008-02-07
			
 
				+
			
 
				+  NEW FEATURES
			
 
				+
			
 
				+    HADOOP-1301.  Hadoop-On-Demand (HOD): resource management
			
 
				+    provisioning for Hadoop. (Hemanth Yamijala via nigel)
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HADOOP-2720. Jumbo bug fix patch to HOD.  Final sync of Apache SVN with
			
 
				+    internal Yahoo SVN.  (Hemanth Yamijala via nigel)
			
 
				+
			
 
				+    HADOOP-2740. Fix HOD to work with the configuration variables changed in
			
 
				+    HADOOP-2404. (Hemanth Yamijala via omalley)
			
 
				+
			
--- a/common/src/contrib/hod/README
+++ b/common/src/contrib/hod/README
@@ -0,0 +1,104 @@
 
				+                        Hadoop On Demand
			
 
				+                        ================
			
 
				+
			
 
				+1. Introduction:
			
 
				+================
			
 
				+
			
 
				+The Hadoop On Demand (HOD) project is a system for provisioning and 
			
 
				+managing independent Hadoop MapReduce instances on a shared cluster 
			
 
				+of nodes. HOD uses a resource manager for allocation. At present it
			
 
				+supports Torque (http://www.clusterresources.com/pages/products/torque-resource-manager.php)
			
 
				+out of the box. 
			
 
				+
			
 
				+2. Feature List:
			
 
				+================
			
 
				+
			
 
				+The following are the features provided by HOD:
			
 
				+
			
 
				+2.1 Simplified interface for managing MapReduce clusters:
			
 
				+
			
 
				+The MapReduce user interacts with the cluster through a simple 
			
 
				+command line interface, the HOD client. HOD brings up a virtual 
			
 
				+MapReduce cluster with the required number of nodes, which the 
			
 
				+user can use for running Hadoop jobs. When done, HOD will 
			
 
				+automatically clean up the resources and make the nodes available 
			
 
				+again.
			
 
				+
			
 
				+2.2 Automatic installation of Hadoop:
			
 
				+
			
 
				+With HOD, Hadoop does not need to be even installed on the cluster.
			
 
				+The user can provide a Hadoop tarball that HOD will automatically 
			
 
				+distribute to all the nodes in the cluster.
			
 
				+
			
 
				+2.3 Configuring Hadoop:
			
 
				+
			
 
				+Dynamic parameters of Hadoop configuration, such as the NameNode and 
			
 
				+JobTracker addresses and ports, and file system temporary directories
			
 
				+are generated and distributed by HOD automatically to all nodes in
			
 
				+the cluster.
			
 
				+
			
 
				+In addition, HOD allows the user to configure Hadoop parameters
			
 
				+at both the server (for e.g. JobTracker) and client (for e.g. JobClient)
			
 
				+level, including 'final' parameters, that were introduced with 
			
 
				+Hadoop 0.15.
			
 
				+
			
 
				+2.4 Auto-cleanup of unused clusters:
			
 
				+
			
 
				+HOD has an automatic timeout so that users cannot misuse resources they 
			
 
				+aren't using. The timeout applies only when there is no MapReduce job 
			
 
				+running. 
			
 
				+
			
 
				+2.5 Log services:
			
 
				+
			
 
				+HOD can be used to collect all MapReduce logs to a central location
			
 
				+for archiving and inspection after the job is completed.
			
 
				+
			
 
				+3. HOD Components
			
 
				+=================
			
 
				+
			
 
				+This is a brief overview of the various components of HOD and how they
			
 
				+interact to provision Hadoop.
			
 
				+
			
 
				+HOD Client: The HOD client is a Unix command that users use to allocate 
			
 
				+Hadoop MapReduce clusters. The command provides other options to list 
			
 
				+allocated clusters and deallocate them. The HOD client generates the 
			
 
				+hadoop-site.xml in a user specified directory. The user can point to 
			
 
				+this configuration file while running Map/Reduce jobs on the allocated 
			
 
				+cluster.
			
 
				+
			
 
				+RingMaster: The RingMaster is a HOD process that is started on one node 
			
 
				+per every allocated cluster. It is submitted as a 'job' to the resource 
			
 
				+manager by the HOD client. It controls which Hadoop daemons start on 
			
 
				+which nodes. It provides this information to other HOD processes, 
			
 
				+such as the HOD client, so users can also determine this information. 
			
 
				+The RingMaster is responsible for hosting and distributing the 
			
 
				+Hadoop tarball to all nodes in the cluster. It also automatically 
			
 
				+cleans up unused clusters.
			
 
				+
			
 
				+HodRing: The HodRing is a HOD process that runs on every allocated node
			
 
				+in the cluster. These processes are run by the RingMaster through the 
			
 
				+resource manager, using a facility of parallel execution. The HodRings
			
 
				+are responsible for launching Hadoop commands on the nodes to bring up 
			
 
				+the Hadoop daemons. They get the command to launch from the RingMaster.
			
 
				+
			
 
				+Hodrc / HOD configuration file: An INI style configuration file where
			
 
				+the users configure various options for the HOD system, including
			
 
				+install locations of different software, resource manager parameters,
			
 
				+log and temp file directories, parameters for their MapReduce jobs,
			
 
				+etc.
			
 
				+
			
 
				+Submit Nodes: Nodes where the HOD Client is run, from where jobs are
			
 
				+submitted to the resource manager system for allocating and running 
			
 
				+clusters.
			
 
				+
			
 
				+Compute Nodes: Nodes which get allocated by a resource manager, 
			
 
				+and on which the Hadoop daemons are provisioned and started.
			
 
				+
			
 
				+4. Next Steps:
			
 
				+==============
			
 
				+
			
 
				+- Read getting_started.txt to get an idea of how to get started with
			
 
				+installing, configuring and running HOD.
			
 
				+
			
 
				+- Read config.txt to get more details on configuration options for HOD.
			
 
				+
			
--- a/common/src/contrib/hod/bin/VERSION
+++ b/common/src/contrib/hod/bin/VERSION
@@ -0,0 +1 @@
 
				+0.4.0
			
--- a/common/src/contrib/hod/bin/checknodes
+++ b/common/src/contrib/hod/bin/checknodes
@@ -0,0 +1,31 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+PBS_NODES_PATH=`which pbsnodes 2>/dev/null`
			
 
				+if [ -z $PBS_NODES_PATH ]
			
 
				+then
			
 
				+  echo Could not find pbsnodes in path. Cannot check available number of nodes. >&2
			
 
				+  exit 1
			
 
				+fi
			
 
				+if [ -z $1 ]
			
 
				+then
			
 
				+  echo Usage: checknodes queue-name >&2
			
 
				+  exit 2
			
 
				+fi
			
 
				+# the number of nodes marked 'free', and which do not contain a jobs attribute from the server or from the moms.
			
 
				+$PBS_NODES_PATH :$1 | awk 'BEGIN {c=0} /state = free/ {getline;getline;getline;getline; if ($0 !~ /jobs =/ && $0 !~ /jobs=[0-9].*/)  c++ ; } END {print c}'
			
--- a/common/src/contrib/hod/bin/hod
+++ b/common/src/contrib/hod/bin/hod
@@ -0,0 +1,580 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+""":"
			
 
				+work_dir=$(dirname $0)
			
 
				+base_name=$(basename $0)
			
 
				+original_dir=$PWD
			
 
				+cd $work_dir
			
 
				+
			
 
				+if [ $HOD_PYTHON_HOME ]; then
			
 
				+    exec $HOD_PYTHON_HOME -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir
			
 
				+elif [ -e /usr/bin/python ]; then
			
 
				+    exec /usr/bin/python -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir
			
 
				+elif [ -e /usr/local/bin/python ]; then
			
 
				+    exec /usr/local/bin/python -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir
			
 
				+else
			
 
				+    exec python -u -OO $base_name ${1+"$@"} --hod.original-dir $work_dir
			
 
				+fi
			
 
				+":"""
			
 
				+
			
 
				+"""The executable to be used by the user"""
			
 
				+import sys, os, re, pwd, threading, sys
			
 
				+
			
 
				+myName          = os.path.basename(sys.argv[0])
			
 
				+myName          = re.sub(".*/", "", myName)
			
 
				+binDirectory    = os.path.realpath(sys.argv[0])
			
 
				+rootDirectory   = re.sub("/bin/.*", "", binDirectory)
			
 
				+libDirectory    = rootDirectory
			
 
				+
			
 
				+sys.path.append(libDirectory)
			
 
				+
			
 
				+from hodlib.Hod.hod import hodRunner
			
 
				+from hodlib.Common.setup import *
			
 
				+from hodlib.Common.descGenerator import *
			
 
				+from hodlib.Common.util import local_fqdn, need_to_allocate, filter_warnings,\
			
 
				+    get_exception_error_string, hodInterrupt, \
			
 
				+    HOD_INTERRUPTED_MESG, HOD_INTERRUPTED_CODE,\
			
 
				+    TORQUE_USER_LIMITS_COMMENT_FIELD
			
 
				+from hodlib.Common.tcp import tcpError, tcpSocket
			
 
				+from hodlib.Hod.hod import hodHelp
			
 
				+
			
 
				+filter_warnings()
			
 
				+
			
 
				+reVersion = re.compile(".*(\d+_\d+).*")
			
 
				+
			
 
				+VERSION = None
			
 
				+if os.path.exists("./VERSION"):
			
 
				+  vFile = open("./VERSION", 'r')
			
 
				+  VERSION = vFile.readline()
			
 
				+  vFile.close()
			
 
				+
			
 
				+# Always look for hodrc file here unless otherwise specified with -c:   
			
 
				+DEFAULT_LOC = os.path.join(rootDirectory, 'conf')
			
 
				+DEFAULT_HOD_DIR = os.path.join(os.environ['HOME'], ".hod")
			
 
				+
			
 
				+if not os.path.isdir(DEFAULT_HOD_DIR):
			
 
				+  os.mkdir(DEFAULT_HOD_DIR, 0777)
			
 
				+
			
 
				+DEFAULT_CONFIG = os.path.join(DEFAULT_HOD_DIR, 'hodrc')
			
 
				+if not os.path.exists(DEFAULT_CONFIG):
			
 
				+  if os.environ.has_key('HOD_CONF_DIR') and os.environ['HOD_CONF_DIR'] is not None:
			
 
				+    DEFAULT_CONFIG = os.path.join(os.environ['HOD_CONF_DIR'], 'hodrc')
			
 
				+
			
 
				+# Definition tuple is of the form:
			
 
				+#  (name, type, description, help?, default value, required?, validate?, 
			
 
				+#   short option)
			
 
				+#
			
 
				+defList = { 'hod' : (      
			
 
				+             ('original-dir', 'directory', 'hod original start directory',
			
 
				+              False, None, True, True, 'r'),
			
 
				+
			
 
				+             ('clusterdir', 'directory', 
			
 
				+             'Directory where cluster state information and hadoop-site.xml' +
			
 
				+             ' will be stored.',
			
 
				+              True, None, False, False, 'd'),
			
 
				+
			
 
				+             ('syslog-address', 'address', 'Syslog address.',
			
 
				+              False, None, False, True, 'y'),
			
 
				+              
			
 
				+             ('java-home', 'directory', 'Java home directory.',
			
 
				+              True, None, True, True, 'j'),
			
 
				+            
			
 
				+             ('debug', 'pos_int', 'Debugging level, 0-4.',
			
 
				+              True, 3, True, True, 'b'),
			
 
				+            
			
 
				+             ('stream', 'bool', 'Output to stderr.',
			
 
				+              False, True, False, True),
			
 
				+
			
 
				+             ('nodecount', 'pos_int', 
			
 
				+              'Number of nodes to allocate at startup. ',
			
 
				+              True, None, False, True, 'n'),
			
 
				+
			
 
				+             ('script', 'file', 'Hadoop script to execute.',
			
 
				+              True, None, False, False, 's'), 
			
 
				+
			
 
				+             ('userid', 'user_account', 
			
 
				+              'User ID the hod shell is running under.',
			
 
				+              False, pwd.getpwuid(os.getuid())[0], False, True, 'u'),
			
 
				+             
			
 
				+             ('allocate-wait-time', 'pos_int', 
			
 
				+              'Time to wait for cluster allocation.',
			
 
				+              False, 300, True, True, 'e'),         
			
 
				+              
			
 
				+             ('operation', 'string',
			
 
				+              'Initiate a hod operation. (help, allocate, deallocate ...)',
			
 
				+              False, None, False, True, 'o'),
			
 
				+             
			
 
				+             ('cluster-factor', 'pos_float',
			
 
				+              'The number of grid slots per machines', False, 1.9, False, True,
			
 
				+              'x'),
			
 
				+             
			
 
				+             ('cluster', 'string', 'Name of cluster being used.',
			
 
				+              False, None, True, True, 'w'),
			
 
				+
			
 
				+             ('proxy-xrs-address', 'address', 
			
 
				+              'Address to Allocation Manager XML RPC proxy.',
			
 
				+              False, None, False, True, 'p'),
			
 
				+              
			
 
				+             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
			
 
				+              False, None, True, True),
			
 
				+
			
 
				+             ('client-params', 'keyval', 'Hadoop client xml key/value list',
			
 
				+              True, None, False, True, 'C'), 
			
 
				+
			
 
				+             ('hadoop-ui-log-dir', 'directory', 'Directory to store Web UI Logs of Hadoop',
			
 
				+              True, None, False, True),
			
 
				+
			
 
				+             ('temp-dir', 'directory', 'HOD temporary directories.',
			
 
				+              False, None, True, False),
			
 
				+
			
 
				+             ('update-worker-info', 'bool', 'Specifies whether to update Worker Info after allocation',
			
 
				+              False, False, False, True),
			
 
				+
			
 
				+             ('job-feasibility-attr', 'string', 'Specifies whether to check job feasibility - resource manager and/or scheduler limits, also gives the attribute value',
			
 
				+              False, None, False, True),
			
 
				+
			
 
				+             ('title', 'string', 'Title for the current HOD allocation.',
			
 
				+               True, "HOD", False, True, 'N'),
			
 
				+
			
 
				+             ('walltime', 'pos_int', 'Walltime in seconds for the current HOD allocation',
			
 
				+              True, None, False, True, 'l'),
			
 
				+
			
 
				+             ('script-wait-time', 'pos_int', 'Specifies the time to wait before running the script. Used with the hod.script option.',
			
 
				+              True, 10, False, True, 'W'),
			
 
				+
			
 
				+             ('log-rollover-count', 'pos_int', 'Specifies the number of rolled-over log files of HOD client. A zero value disables rollover.',
			
 
				+              True, 5, False, True, 'L'),
			
 
				+
			
 
				+             ('job-status-query-interval', 'pos_int', 'Specifies the time between checking for job status', 
			
 
				+              False, 30, False, True),
			
 
				+
			
 
				+             ('job-command-failure-interval', 'pos_int', 'Specifies the time between checking for failed job status or submission commands', 
			
 
				+              False, 10, False, True),
			
 
				+
			
 
				+             ('job-status-query-failure-retries', 'pos_int', 'Specifies the number of times job status failure queries are retried', 
			
 
				+              False, 3, False, True),
			
 
				+
			
 
				+             ('job-submission-failure-retries', 'pos_int', 'Specifies the number of times job submission failure queries are retried',
			
 
				+              False, 3, False, True)),
			
 
				+
			
 
				+            'resource_manager' : (
			
 
				+             ('id', 'string', 'Batch scheduler ID: torque|condor.',
			
 
				+              False, None, True, True),
			
 
				+             
			
 
				+             ('pbs-user', 'user_account', 'User ID jobs are submitted under.',
			
 
				+              False, None, False, True),
			
 
				+              
			
 
				+             ('pbs-account', 'string', 'User Account jobs are submitted under.',
			
 
				+              True, None, False, False, 'A'),
			
 
				+              
			
 
				+             ('queue', 'string', 'Queue of the batch scheduler to query.',
			
 
				+              True, 'batch', False, True, 'Q'),
			
 
				+             
			
 
				+             ('batch-home', 'directory', 'Scheduler installation directory.',
			
 
				+              False, None, True, True),
			
 
				+             
			
 
				+             ('options', 'keyval', 'Options to pass to the scheduler.',
			
 
				+              False, None, False, True),
			
 
				+
			
 
				+             ('env-vars', 'keyval', 'Environment variables to pass to the submitted jobs.',
			
 
				+              False, None, False, True)),
			
 
				+                            
			
 
				+            'ringmaster' : (
			
 
				+             ('work-dirs', 'list', 'hod work directories',
			
 
				+              False, None, True, False),
			
 
				+
			
 
				+             ('temp-dir', 'directory', 'Ringmaster temporary directory.',
			
 
				+              False, None, True, False),
			
 
				+              
			
 
				+             ('log-dir', 'directory', 'hod logging directory.', 
			
 
				+              False, os.path.join(rootDirectory, 'logs'), False, False),
			
 
				+
			
 
				+             ('syslog-address', 'address', 'Syslog address.',
			
 
				+              False, None, False, True),
			
 
				+
			
 
				+             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
			
 
				+              False, None, True, True),
			
 
				+              
			
 
				+             ('http-port-range', 'range', 'HTTP port range n-m.',
			
 
				+              False, None, True, True),
			
 
				+              
			
 
				+             ('debug', 'pos_int', 'Debugging level, 0-4.',
			
 
				+              False, 4, True,   True),
			
 
				+               
			
 
				+             ('register', 'bool', 'Register with service registry?',
			
 
				+              False, True, True, True),
			
 
				+               
			
 
				+             ('stream', 'bool', 'Output to stderr.',
			
 
				+              False, False, False, True),
			
 
				+              
			
 
				+             ('userid', 'user_account', 
			
 
				+              'User ID the hod shell is running under.',
			
 
				+              False, pwd.getpwuid(os.getuid())[0], False, True),
			
 
				+               
			
 
				+             ('svcrgy-addr', 'address', 'Download HTTP address.',
			
 
				+              False, None, False, False),             
			
 
				+             
			
 
				+             ('hadoop-tar-ball', 'uri', 'hadoop program tar ball.',
			
 
				+              True, None, False, False, 't'),
			
 
				+
			
 
				+             ('max-connect','pos_int','max connections allowed for a single tarball server',
			
 
				+             False, 30, False, True),
			
 
				+
			
 
				+             ('jt-poll-interval', 'pos_int', 'How often to poll the Job tracker for idleness',
			
 
				+             False, 120, False, True),
			
 
				+
			
 
				+             ('idleness-limit', 'pos_int', 'Limit after which to deallocate the cluster',
			
 
				+             False, 3600, False, True),
			
 
				+
			
 
				+             ('max-master-failures', 'pos_int', 
			
 
				+              'Defines how many times a master can fail before' \
			
 
				+              ' failing cluster allocation', False, 5, True, True),
			
 
				+
			
 
				+             ('workers_per_ring', 'pos_int', 'Defines number of workers per service per hodring',
			
 
				+             False, 1, False, True)),
			
 
				+
			
 
				+            'gridservice-mapred' : (
			
 
				+             ('external', 'bool', "Connect to an already running MapRed?",
			
 
				+              False, False, True, True),
			
 
				+              
			
 
				+             ('host', 'hostname', 'Mapred hostname.', 
			
 
				+              False, 'localhost', False, False),
			
 
				+
			
 
				+             ('info_port', 'pos_int', 'Mapred info port.',
			
 
				+              False, None, False, False),
			
 
				+             
			
 
				+             ('tracker_port', 'pos_int', 'Mapred job tracker port.',
			
 
				+              False, None, False, False),
			
 
				+                        
			
 
				+             ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.',
			
 
				+              False, None, False, False),
			
 
				+
			
 
				+             ('server-params', 'keyval', 'Hadoop xml key/value list',
			
 
				+              True, None, False, True, 'M'),
			
 
				+               
			
 
				+             ('envs', 'keyval', 'environment to run this package in',
			
 
				+              False, None, False, True),
			
 
				+
			
 
				+             ('final-server-params', 'keyval', 'Hadoop final xml key/val list',
			
 
				+              False, None, False, True, 'F'),
			
 
				+
			
 
				+             ('pkgs', 'directory', "directory where the package is installed",
			
 
				+              False, None, False, False)), 
			
 
				+               
			
 
				+               
			
 
				+            'gridservice-hdfs' : (
			
 
				+             ('external', 'bool', "Connect to an already running HDFS?",
			
 
				+              False, False, True, True),
			
 
				+             
			
 
				+             ('host', 'hostname', 'HDFS hostname.', 
			
 
				+              False, 'localhost', False, False),
			
 
				+             
			
 
				+             ('fs_port', 'pos_int', 'HDFS port.',
			
 
				+              False, None, False, False),
			
 
				+              
			
 
				+             ('info_port', 'pos_int', 'HDFS info port.',
			
 
				+              False, None, False, False), 
			
 
				+             
			
 
				+             ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.',
			
 
				+              False, None, False, False),
			
 
				+
			
 
				+             ('server-params', 'keyval', 'Hadoop xml key/value list',
			
 
				+              False, None, False, True, 'H'),
			
 
				+
			
 
				+             ('final-server-params', 'keyval', 'Hadoop final xml key/value list',
			
 
				+              False, None, False, True, 'S'),
			
 
				+           
			
 
				+             ('envs', 'keyval', 'Environment in which to run this package.',
			
 
				+              False, None, False, True),
			
 
				+
			
 
				+             ('pkgs', 'directory', "directory where the package is installed",
			
 
				+              False, None, False, False)),           
			
 
				+             
			
 
				+             
			
 
				+            'hodring' : (
			
 
				+             ('temp-dir', 'list', 'hodring temporary directory.',
			
 
				+              False, None, True, False),
			
 
				+              
			
 
				+             ('log-dir', 'directory', 'hod logging directory.', 
			
 
				+              False, os.path.join(rootDirectory, 'logs'), False, False), 
			
 
				+              
			
 
				+             ('log-destination-uri', 'string', 
			
 
				+              'URI to store logs to, local://some_path or '
			
 
				+              + 'hdfs://host:port/some_path', 
			
 
				+              False, None, False, True),
			
 
				+
			
 
				+             ('pkgs', 'directory', 'Path to Hadoop to use in case of uploading to HDFS',
			
 
				+              False, None, False, False),
			
 
				+              
			
 
				+             ('syslog-address', 'address', 'Syslog address.',
			
 
				+              False, None, False, True),
			
 
				+          
			
 
				+             ('java-home', 'directory', 'Java home directory.',
			
 
				+              False, None, True, False),
			
 
				+              
			
 
				+             ('debug', 'pos_int', 'Debugging level, 0-4.',
			
 
				+              False, 3, True, True),
			
 
				+               
			
 
				+             ('register', 'bool', 'Register with service registry?',
			
 
				+              False, True, True, True),
			
 
				+               
			
 
				+             ('stream', 'bool', 'Output to stderr.',
			
 
				+              False, False, False, True),
			
 
				+
			
 
				+             ('userid', 'user_account', 
			
 
				+              'User ID the hod shell is running under.',
			
 
				+              False, pwd.getpwuid(os.getuid())[0], False, True),
			
 
				+               
			
 
				+             ('command', 'string', 'Command for hodring to run.',
			
 
				+              False, None, False, True),
			
 
				+
			
 
				+             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
			
 
				+              False, None, True, True),
			
 
				+               
			
 
				+             ('http-port-range', 'range', 'HTTP port range n-m.',
			
 
				+              False, None, True, True),
			
 
				+              
			
 
				+             ('hadoop-port-range', 'range', 'Hadoop port range n-m.',
			
 
				+              False, None, True, True),  
			
 
				+            
			
 
				+             ('service-id', 'string', 'Service ID.',
			
 
				+              False, None, False, True),
			
 
				+              
			
 
				+             ('download-addr', 'string', 'Download HTTP address.',
			
 
				+              False, None, False, True),
			
 
				+               
			
 
				+             ('svcrgy-addr', 'address', 'Download HTTP address.',
			
 
				+              False, None, False, True), 
			
 
				+    
			
 
				+             ('ringmaster-xrs-addr', 'address', 'Ringmaster XML-RPC address.',
			
 
				+              False, None, False, True),
			
 
				+
			
 
				+             ('tarball-retry-initial-time', 'pos_float','Initial Retry time for tarball download',
			
 
				+              False, 1, False, True),
			
 
				+              
			
 
				+             ('tarball-retry-interval', 'pos_float','interval to spread retries for tarball download',
			
 
				+              False, 3, False, True),
			
 
				+              
			
 
				+             ('cmd-retry-initial-time', 'pos_float','Initial retry time for getting commands',
			
 
				+              False, 2, False, True),
			
 
				+             
			
 
				+             ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands',
			
 
				+              False, 2, False, True),
			
 
				+
			
 
				+             ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.',
			
 
				+              False, '/mapredsystem', False, False))
			
 
				+              }   
			
 
				+
			
 
				+defOrder = [ 'hod', 'ringmaster', 'hodring', 'resource_manager', 
			
 
				+             'gridservice-mapred', 'gridservice-hdfs' ]
			
 
				+
			
 
				+def printErrors(msgs):
			
 
				+  for msg in msgs:
			
 
				+    print msg
			
 
				+
			
 
				+def op_requires_pkgs(config):
			
 
				+  if config['hod'].has_key('operation'):
			
 
				+    return config['hod']['operation'].startswith('allocate')
			
 
				+  else:
			
 
				+    return config['hod'].has_key('script')
			
 
				+
			
 
				+if __name__ == '__main__':  
			
 
				+  try:
			
 
				+    confDef = definition()
			
 
				+    confDef.add_defs(defList, defOrder)
			
 
				+    hodhelp = hodHelp()
			
 
				+    usage = hodhelp.help()
			
 
				+            
			
 
				+    hodOptions = options(confDef, usage,
			
 
				+                      VERSION, withConfig=True, defaultConfig=DEFAULT_CONFIG,
			
 
				+                      name=myName )
			
 
				+    # hodConfig is a dict like object, hodConfig[section][name]
			
 
				+    try:
			
 
				+      hodConfig = config(hodOptions['config'], configDef=confDef, 
			
 
				+                       originalDir=hodOptions['hod']['original-dir'],
			
 
				+                       options=hodOptions) 
			
 
				+    except IOError, e:
			
 
				+      print >>sys.stderr,"error: %s not found. Specify the path to the HOD configuration file, or define the environment variable %s under which a file named hodrc can be found." % (hodOptions['config'], 'HOD_CONF_DIR')
			
 
				+      sys.exit(1)
			
 
				+  
			
 
				+    # Conditional validation
			
 
				+    statusMsgs = []
			
 
				+
			
 
				+    if hodConfig.normalizeValue('gridservice-hdfs', 'external'):
			
 
				+      # For external HDFS
			
 
				+      statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs',
			
 
				+                                                'fs_port'))
			
 
				+      statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs',
			
 
				+                                                'info_port'))
			
 
				+      statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs',
			
 
				+                                                'host'))
			
 
				+    else:
			
 
				+      hodConfig['gridservice-hdfs']['fs_port'] = 0 # Dummy
			
 
				+      hodConfig['gridservice-hdfs']['info_port'] = 0 # Not used at all
			
 
				+
			
 
				+    if hodConfig.normalizeValue('gridservice-mapred', 'external'):
			
 
				+      statusMsgs.extend(hodConfig.validateValue('gridservice-mapred',
			
 
				+                                                'tracker_port'))
			
 
				+      statusMsgs.extend(hodConfig.validateValue('gridservice-mapred',
			
 
				+                                                'info_port'))
			
 
				+      statusMsgs.extend(hodConfig.validateValue('gridservice-mapred',
			
 
				+                                                'host'))
			
 
				+    else:
			
 
				+      hodConfig['gridservice-mapred']['tracker_port'] = 0 # Dummy
			
 
				+      hodConfig['gridservice-mapred']['info_port'] = 0 # Not used at all
			
 
				+
			
 
				+    if len(statusMsgs) != 0:
			
 
				+      for msg in statusMsgs:
			
 
				+        print >>sys.stderr, msg
			
 
				+      sys.exit(1)
			
 
				+    # End of conditional validation
			
 
				+
			
 
				+    status = True
			
 
				+    statusMsgs = []
			
 
				+  
			
 
				+    (status,statusMsgs) = hodConfig.verify()
			
 
				+    if not status:
			
 
				+      print >>sys.stderr,"error: bin/hod failed to start."
			
 
				+      for msg in statusMsgs:
			
 
				+        print >>sys.stderr,"%s" % (msg)
			
 
				+      sys.exit(1)
			
 
				+  
			
 
				+    ## TODO : should move the dependency verification to hodConfig.verify
			
 
				+    if hodConfig['hod'].has_key('operation') and \
			
 
				+      hodConfig['hod'].has_key('script'):
			
 
				+      print "Script operation is mutually exclusive with other HOD operations"
			
 
				+      hodOptions.print_help(sys.stderr)
			
 
				+      sys.exit(1)
			
 
				+    
			
 
				+    if 'operation' not in hodConfig['hod'] and 'script' not in hodConfig['hod']:
			
 
				+      print "HOD requires at least a script or operation be specified."
			
 
				+      hodOptions.print_help(sys.stderr)
			
 
				+      sys.exit(1)    
			
 
				+    
			
 
				+    if hodConfig['gridservice-hdfs']['external']:
			
 
				+      hdfsAddress = "%s:%s" % (hodConfig['gridservice-hdfs']['host'], 
			
 
				+                               hodConfig['gridservice-hdfs']['fs_port'])
			
 
				+  
			
 
				+      hdfsSocket = tcpSocket(hdfsAddress)
			
 
				+        
			
 
				+      try:
			
 
				+        hdfsSocket.open()
			
 
				+        hdfsSocket.close()
			
 
				+      except tcpError:
			
 
				+        printErrors(hodConfig.var_error('hod', 'gridservice-hdfs', 
			
 
				+          "Failed to open a connection to external hdfs address: %s." % 
			
 
				+          hdfsAddress))
			
 
				+        sys.exit(1)
			
 
				+    else:
			
 
				+      hodConfig['gridservice-hdfs']['host'] = 'localhost'
			
 
				+  
			
 
				+    if hodConfig['gridservice-mapred']['external']:
			
 
				+      mapredAddress = "%s:%s" % (hodConfig['gridservice-mapred']['host'], 
			
 
				+                                 hodConfig['gridservice-mapred']['tracker_port'])
			
 
				+  
			
 
				+      mapredSocket = tcpSocket(mapredAddress)
			
 
				+        
			
 
				+      try:
			
 
				+        mapredSocket.open()
			
 
				+        mapredSocket.close()
			
 
				+      except tcpError:
			
 
				+        printErrors(hodConfig.var_error('hod', 'gridservice-mapred', 
			
 
				+          "Failed to open a connection to external mapred address: %s." % 
			
 
				+          mapredAddress))
			
 
				+        sys.exit(1)
			
 
				+    else:
			
 
				+      hodConfig['gridservice-mapred']['host'] = 'localhost'
			
 
				+  
			
 
				+    if not hodConfig['ringmaster'].has_key('hadoop-tar-ball') and \
			
 
				+      not hodConfig['gridservice-hdfs'].has_key('pkgs') and \
			
 
				+      op_requires_pkgs(hodConfig):
			
 
				+      printErrors(hodConfig.var_error('gridservice-hdfs', 'pkgs', 
			
 
				+        "gridservice-hdfs.pkgs must be defined if ringmaster.hadoop-tar-ball "
			
 
				+        + "is not defined."))
			
 
				+      sys.exit(1)
			
 
				+  
			
 
				+    if not hodConfig['ringmaster'].has_key('hadoop-tar-ball') and \
			
 
				+      not hodConfig['gridservice-mapred'].has_key('pkgs') and \
			
 
				+      op_requires_pkgs(hodConfig):
			
 
				+      printErrors(hodConfig.var_error('gridservice-mapred', 'pkgs', 
			
 
				+        "gridservice-mapred.pkgs must be defined if ringmaster.hadoop-tar-ball "
			
 
				+        + "is not defined."))
			
 
				+      sys.exit(1)
			
 
				+  
			
 
				+    if hodConfig['hodring'].has_key('log-destination-uri'):
			
 
				+      if hodConfig['hodring']['log-destination-uri'].startswith('file://'):
			
 
				+        pass
			
 
				+      elif hodConfig['hodring']['log-destination-uri'].startswith('hdfs://'):
			
 
				+        hostPort = hodConfig['hodring']['log-destination-uri'][7:].split("/")
			
 
				+        hostPort = hostPort[0]
			
 
				+        socket = tcpSocket(hostPort)
			
 
				+        try:
			
 
				+          socket.open()
			
 
				+          socket.close()
			
 
				+        except:
			
 
				+          printErrors(hodConfig.var_error('hodring', 'log-destination-uri', 
			
 
				+          "Unable to contact host/port specified in log destination uri: %s" % 
			
 
				+          hodConfig['hodring']['log-destination-uri']))
			
 
				+          sys.exit(1)
			
 
				+      else:
			
 
				+        printErrors(hodConfig.var_error('hodring', 'log-destination-uri', 
			
 
				+          "The log destiniation uri must be of type local:// or hdfs://."))
			
 
				+        sys.exit(1)
			
 
				+  
			
 
				+    if hodConfig['ringmaster']['workers_per_ring'] < 1:
			
 
				+      printErrors(hodConfig.var_error('ringmaster', 'workers_per_ring',
			
 
				+                "ringmaster.workers_per_ring must be a positive integer " +
			
 
				+                "greater than or equal to 1"))
			
 
				+      sys.exit(1)
			
 
				+                        
			
 
				+    ## TODO : end of should move the dependency verification to hodConfig.verif
			
 
				+      
			
 
				+    hodConfig['hod']['base-dir'] = rootDirectory
			
 
				+    hodConfig['hod']['user_state'] = DEFAULT_HOD_DIR
			
 
				+  
			
 
				+    dGen = DescGenerator(hodConfig)
			
 
				+    hodConfig = dGen.initializeDesc()
			
 
				+    
			
 
				+    os.environ['JAVA_HOME'] = hodConfig['hod']['java-home']
			
 
				+    
			
 
				+    if hodConfig['hod']['debug'] == 4:
			
 
				+      print ""
			
 
				+      print "Using Python: %s" % sys.version
			
 
				+      print ""
			
 
				+   
			
 
				+    hod = hodRunner(hodConfig)
			
 
				+  
			
 
				+    # Initiate signal handling
			
 
				+    hodInterrupt.set_log(hod.get_logger())
			
 
				+    hodInterrupt.init_signals()
			
 
				+    # Interrupts set up. Now on we handle signals only when we wish to.
			
 
				+  except KeyboardInterrupt:
			
 
				+    print HOD_INTERRUPTED_MESG
			
 
				+    sys.exit(HOD_INTERRUPTED_CODE)
			
 
				+  
			
 
				+  opCode = 0
			
 
				+  try:
			
 
				+    if hodConfig['hod'].has_key('script'):
			
 
				+      opCode = hod.script()
			
 
				+    else:  
			
 
				+      opCode = hod.operation()
			
 
				+  except Exception, e:
			
 
				+    print "Uncaught Exception : %s" % e
			
 
				+  finally:
			
 
				+    sys.exit(opCode)
			
--- a/common/src/contrib/hod/bin/hodcleanup
+++ b/common/src/contrib/hod/bin/hodcleanup
@@ -0,0 +1,183 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+""":"
			
 
				+work_dir=$(dirname $0)
			
 
				+base_name=$(basename $0)
			
 
				+original_dir=$PWD
			
 
				+cd $work_dir
			
 
				+
			
 
				+if [ $HOD_PYTHON_HOME ]; then
			
 
				+    exec $HOD_PYTHON_HOME -u -OO $base_name ${1+"$@"}
			
 
				+elif [ -e /usr/bin/python ]; then
			
 
				+    exec /usr/bin/python -u -OO $base_name ${1+"$@"}
			
 
				+elif [ -e /usr/local/bin/python ]; then
			
 
				+    exec /usr/local/bin/python -u -OO $base_name ${1+"$@"}
			
 
				+else
			
 
				+    exec python -u -OO $base_name ${1+"$@"}
			
 
				+fi
			
 
				+":"""
			
 
				+
			
 
				+"""The executable to be used by the user"""
			
 
				+import sys, os, re, pwd, threading, sys, random, time, pprint, shutil, time, re
			
 
				+from pprint import pformat
			
 
				+from optparse import OptionParser
			
 
				+
			
 
				+myName          = os.path.basename(sys.argv[0])
			
 
				+myName          = re.sub(".*/", "", myName)
			
 
				+binDirectory    = os.path.realpath(sys.argv[0])
			
 
				+rootDirectory   = re.sub("/bin/.*", "", binDirectory)
			
 
				+libDirectory    = rootDirectory
			
 
				+
			
 
				+sys.path.append(libDirectory)
			
 
				+
			
 
				+from hodlib.Common.threads import simpleCommand
			
 
				+from hodlib.Common.util import local_fqdn, tar, filter_warnings,\
			
 
				+                            get_exception_string, get_exception_error_string
			
 
				+from hodlib.Common.logger import hodLog
			
 
				+from hodlib.Common.logger import getLogger
			
 
				+from hodlib.HodRing.hodRing import createMRSystemDirectoryManager
			
 
				+
			
 
				+filter_warnings()
			
 
				+
			
 
				+reVersion = re.compile(".*(\d+_\d+).*")
			
 
				+reHdfsURI = re.compile("(hdfs://.*?:\d+)(.*)")
			
 
				+
			
 
				+VERSION = None
			
 
				+if os.path.exists("./VERSION"):
			
 
				+  vFile = open("./VERSION", 'r')
			
 
				+  VERSION = vFile.readline()
			
 
				+  vFile.close()
			
 
				+
			
 
				+def __archive_logs(conf, log):
			
 
				+  # need log-destination-uri, __hadoopLogDirs, temp-dir
			
 
				+  status = True
			
 
				+  logUri = conf['log-destination-uri']
			
 
				+  hadoopLogDirs = conf['hadoop-log-dirs']
			
 
				+  if logUri:
			
 
				+    try:
			
 
				+      if hadoopLogDirs:
			
 
				+        date = time.localtime()
			
 
				+        for logDir in hadoopLogDirs:
			
 
				+          (head, tail) = os.path.split(logDir)
			
 
				+          (head, logType) = os.path.split(head)
			
 
				+          tarBallFile = "%s-%s-%04d%02d%02d%02d%02d%02d-%s.tar.gz" % (
			
 
				+            logType, local_fqdn(), date[0], date[1], date[2], date[3], 
			
 
				+            date[4], date[5], random.randint(0,1000))
			
 
				+          
			
 
				+          if logUri.startswith('file://'):
			
 
				+            tarBallFile = os.path.join(logUri[7:], 
			
 
				+                                       tarBallFile)
			
 
				+          else:
			
 
				+            tarBallFile = os.path.join(conf['temp-dir'], tarBallFile)
			
 
				+          
			
 
				+          log.debug('archiving log files to: %s' % tarBallFile)
			
 
				+          status = tar(tarBallFile, logDir, ['*',])
			
 
				+          log.info('archive %s status: %s' % (tarBallFile, status))
			
 
				+          if status and \
			
 
				+            logUri.startswith('hdfs://'):
			
 
				+            __copy_archive_to_dfs(conf, tarBallFile)
			
 
				+            log.info("copying archive to dfs finished")
			
 
				+        dict = {} 
			
 
				+    except:
			
 
				+      log.error(get_exception_string())
			
 
				+      status = False
			
 
				+  return status
			
 
				+
			
 
				+
			
 
				+def __copy_archive_to_dfs(conf, archiveFile):
			
 
				+  # need log-destination-uri, hadoopCommandstring and/or pkgs
			
 
				+  hdfsURIMatch = reHdfsURI.match(conf['log-destination-uri'])
			
 
				+  
			
 
				+  (head, tail) = os.path.split(archiveFile)
			
 
				+  destFile = os.path.join(hdfsURIMatch.group(2), conf['user-id'], 'hod-logs', conf['service-id'], tail)
			
 
				+  
			
 
				+  log.info("copying archive %s to DFS %s ..." % (archiveFile, destFile))
			
 
				+  
			
 
				+  hadoopCmd = conf['hadoop-command-string']
			
 
				+  if conf['pkgs']:
			
 
				+    hadoopCmd = os.path.join(conf['pkgs'], 'bin', 'hadoop')
			
 
				+
			
 
				+  copyCommand = "%s dfs -fs %s -copyFromLocal %s %s" % (hadoopCmd, 
			
 
				+    hdfsURIMatch.group(1), archiveFile, destFile)
			
 
				+  
			
 
				+  log.debug(copyCommand)
			
 
				+  
			
 
				+  copyThread = simpleCommand('hadoop', copyCommand)
			
 
				+  copyThread.start()
			
 
				+  copyThread.wait()
			
 
				+  copyThread.join()
			
 
				+  log.debug(pprint.pformat(copyThread.output()))
			
 
				+  
			
 
				+  os.unlink(archiveFile)
			
 
				+
			
 
				+def unpack():
			
 
				+  parser = OptionParser()
			
 
				+  option_list=["--log-destination-uri", "--hadoop-log-dirs", \
			
 
				+          "--temp-dir", "--hadoop-command-string", "--pkgs", "--user-id", \
			
 
				+          "--service-id", "--hodring-debug", "--hodring-log-dir", \
			
 
				+          "--hodring-syslog-address", "--hodring-cleanup-list", \
			
 
				+          "--jt-pid", "--mr-sys-dir", "--fs-name", "--hadoop-path"]
			
 
				+  regexp = re.compile("^--")
			
 
				+  for opt in option_list:
			
 
				+    parser.add_option(opt,dest=regexp.sub("",opt),action="store")
			
 
				+  option_list.append("--hodring-stream")
			
 
				+  parser.add_option("--hodring-stream",dest="hodring-stream",metavar="bool",\
			
 
				+                                                        action="store_true")
			
 
				+  (options, args) = parser.parse_args()
			
 
				+  _options= {}
			
 
				+  _options['hodring'] = {}
			
 
				+  for opt in dir(options):
			
 
				+    if "--"+opt in option_list:
			
 
				+      _options[opt] = getattr(options,opt)
			
 
				+  if _options.has_key('hadoop-log-dirs') and _options['hadoop-log-dirs']:
			
 
				+    _options['hadoop-log-dirs'] = _options['hadoop-log-dirs'].split(",")
			
 
				+  if _options.has_key('hodring-syslog-address') and _options['hodring-syslog-address']:
			
 
				+    _options['hodring']['syslog-address'] = \
			
 
				+        _options['hodring-syslog-address'].split(':')
			
 
				+  _options['hodring']['debug']        = int(_options['hodring-debug'])
			
 
				+  _options['hodring']['log-dir']      = _options['hodring-log-dir']
			
 
				+  _options['hodring']['stream']      = _options['hodring-stream']
			
 
				+  _options['hodring']['userid']      = _options['user-id']
			
 
				+  os.putenv('PBS_JOBID', _options['service-id'] )
			
 
				+  return _options
			
 
				+ 
			
 
				+if __name__ == '__main__':  
			
 
				+  log = None
			
 
				+  try:
			
 
				+    conf = unpack()
			
 
				+    # Use the same log as hodring
			
 
				+    log = getLogger(conf['hodring'],'hodring')
			
 
				+    log.debug("Logger initialised successfully")
			
 
				+    mrSysDirManager = createMRSystemDirectoryManager(conf, log)
			
 
				+    if mrSysDirManager is not None:
			
 
				+      mrSysDirManager.removeMRSystemDirectory()
			
 
				+
			
 
				+    status =  __archive_logs(conf,log)
			
 
				+    log.info("Archive status : %s" % status)
			
 
				+    list = conf['hodring-cleanup-list'].split(',')
			
 
				+    log.info("now removing %s" % list)
			
 
				+    for dir in list:
			
 
				+     if os.path.exists(dir):
			
 
				+       log.debug('removing %s' % (dir))
			
 
				+       shutil.rmtree(dir, True)
			
 
				+       log.debug("done")
			
 
				+    log.info("Cleanup successfully completed")
			
 
				+  except Exception, e:
			
 
				+    if log:
			
 
				+      log.info("Stack trace:\n%s\n%s" %(get_exception_error_string(),get_exception_string()))
			
--- a/common/src/contrib/hod/bin/hodring
+++ b/common/src/contrib/hod/bin/hodring
@@ -0,0 +1,290 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+""":"
			
 
				+work_dir=$(dirname $0)
			
 
				+base_name=$(basename $0)
			
 
				+cd $work_dir
			
 
				+
			
 
				+if [ $HOD_PYTHON_HOME ]; then
			
 
				+    exec $HOD_PYTHON_HOME -OO $base_name ${1+"$@"}
			
 
				+elif [ -e /usr/bin/python ]; then
			
 
				+    exec /usr/bin/python -OO $base_name ${1+"$@"}
			
 
				+elif [ -e /usr/local/bin/python ]; then
			
 
				+    exec /usr/local/bin/python -OO $base_name ${1+"$@"}
			
 
				+else
			
 
				+    exec python -OO $base_name ${1+"$@"}
			
 
				+fi
			
 
				+":"""
			
 
				+
			
 
				+"""The executable to be used by the user"""
			
 
				+import sys, os, re
			
 
				+
			
 
				+
			
 
				+myName          = os.path.basename(sys.argv[0])
			
 
				+myName          = re.sub(".*/", "", myName)
			
 
				+binDirectory    = os.path.realpath(sys.argv[0])
			
 
				+rootDirectory   = re.sub("/bin/.*", "", binDirectory)
			
 
				+libDirectory    = rootDirectory
			
 
				+
			
 
				+sys.path.append(libDirectory)
			
 
				+
			
 
				+from hodlib.HodRing.hodRing import HodRing
			
 
				+from hodlib.Common.setup import *
			
 
				+from hodlib.Common.util import filter_warnings, get_exception_string, \
			
 
				+                get_exception_error_string, getMapredSystemDirectory, \
			
 
				+                to_http_url, local_fqdn
			
 
				+from hodlib.Common.logger import getLogger, ensureLogDir
			
 
				+from hodlib.Common.xmlrpc import hodXRClient
			
 
				+
			
 
				+filter_warnings()
			
 
				+
			
 
				+reVersion = re.compile(".*(\d+_\d+).*")
			
 
				+
			
 
				+VERSION = '$HeadURL$'
			
 
				+
			
 
				+reMatch = reVersion.match(VERSION)
			
 
				+if reMatch:
			
 
				+    VERSION = reMatch.group(1)
			
 
				+    VERSION = re.sub("_", ".", VERSION)
			
 
				+else:
			
 
				+    VERSION = 'DEV'
			
 
				+
			
 
				+# Definition tuple is of the form:
			
 
				+#  (name, type, description, default value, required?, validate?)
			
 
				+#
			
 
				+defList = { 'hodring' : (
			
 
				+             ('temp-dir', 'directory', 'hod work directories',
			
 
				+              False, None, True, False),
			
 
				+              
			
 
				+             ('log-dir', 'directory', 'hod logging directory.', 
			
 
				+              False, os.path.join(rootDirectory, 'logs'), False, True), 
			
 
				+
			
 
				+             ('log-destination-uri', 'string', 
			
 
				+              'URI to store logs to, local://some_path or '
			
 
				+              + 'hdfs://host:port/some_path', 
			
 
				+              False, None, False, True), 
			
 
				+
			
 
				+             ('pkgs', 'directory', 'Path to Hadoop to use in case of uploading to HDFS',
			
 
				+              False, None, False, True),
			
 
				+              
			
 
				+             ('syslog-address', 'address', 'Syslog address.',
			
 
				+              False, None, False, True),
			
 
				+          
			
 
				+             ('java-home', 'directory', 'Java home directory.',
			
 
				+              False, None, True, True),
			
 
				+              
			
 
				+             ('debug', 'pos_int', 'Debugging level, 0-4.',
			
 
				+              False, 3, True, True),
			
 
				+               
			
 
				+             ('register', 'bool', 'Register with service registry?',
			
 
				+              False, True, True, True),
			
 
				+               
			
 
				+             ('stream', 'bool', 'Output to stderr.',
			
 
				+              False, False, False, True),
			
 
				+
			
 
				+             ('userid', 'user_account', 
			
 
				+              'User ID the hod shell is running under.',
			
 
				+              False, None, True, False),
			
 
				+
			
 
				+             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
			
 
				+              False, None, True, True),
			
 
				+               
			
 
				+             ('http-port-range', 'range', 'HTTP port range n-m.',
			
 
				+              False, None, True, True),
			
 
				+      
			
 
				+             ('hadoop-port-range', 'range', 'Hadoop port range n-m.',
			
 
				+              False, None, True, True),
			
 
				+               
			
 
				+             ('command', 'string', 'Command for hodring to run.',
			
 
				+              False, None, False, True),
			
 
				+              
			
 
				+             ('service-id', 'string', 'Service ID.',
			
 
				+              False, None, False, True),
			
 
				+              
			
 
				+             ('download-addr', 'string', 'Download HTTP address.',
			
 
				+              False, None, False, True),
			
 
				+               
			
 
				+             ('svcrgy-addr', 'address', 'Service registry XMLRPC address.',
			
 
				+              False, None, True, True), 
			
 
				+    
			
 
				+             ('ringmaster-xrs-addr', 'address', 'Ringmaster XML-RPC address.',
			
 
				+              False, None, False, True),
			
 
				+ 
			
 
				+             ('tarball-retry-initial-time', 'pos_float','initial retry time for tarball download',
			
 
				+              False, 1, False, True),
			
 
				+              
			
 
				+             ('tarball-retry-interval', 'pos_float','interval to spread retries for tarball download',
			
 
				+              False, 3, False, True),
			
 
				+              
			
 
				+             ('cmd-retry-initial-time', 'pos_float','initial retry time for getting commands',
			
 
				+              False, 2, False, True),
			
 
				+             
			
 
				+             ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands',
			
 
				+              False, 2, False, True), 
			
 
				+
			
 
				+             ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.',
			
 
				+              False, '/mapredsystem', False, False))
			
 
				+            }            
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+
			
 
				+  confDef = definition()
			
 
				+  confDef.add_defs(defList)
			
 
				+  hodRingOptions = options(confDef, "./%s [OPTIONS]" % myName, VERSION)
			
 
				+  ensureLogDir(hodRingOptions['hodring']['log-dir'])
			
 
				+  service = None
			
 
				+  try:
			
 
				+    (status, statusMsgs) = hodRingOptions.verify()
			
 
				+    if not status:
			
 
				+      raise Exception("%s" % statusMsgs)
			
 
				+    hodRingOptions['hodring']['base-dir'] = rootDirectory
			
 
				+    service = HodRing(hodRingOptions)
			
 
				+    service.start()
			
 
				+    service.wait()
			
 
				+   
			
 
				+    if service.log:
			
 
				+      log = service.log
			
 
				+    else: 
			
 
				+      log = getLogger(hodRingOptions['hodring'],'hodring')
			
 
				+
			
 
				+    list = []
			
 
				+    
			
 
				+    runningHadoops = service.getRunningValues()
			
 
				+
			
 
				+    mrSysDirManager = None      
			
 
				+    for cmd in runningHadoops:
			
 
				+      if cmd.name == 'jobtracker':
			
 
				+        mrSysDirManager = cmd.getMRSystemDirectoryManager()
			
 
				+      log.debug("addding %s to cleanup list..." % cmd)
			
 
				+      cmd.addCleanup(list)
			
 
				+    
			
 
				+    list.append(service.getTempDir())
			
 
				+    log.debug(list)
			
 
				+       
			
 
				+    # archive_logs now
			
 
				+    cmdString = os.path.join(rootDirectory, "bin", "hodcleanup") # same python
			
 
				+
			
 
				+    if (len(runningHadoops) == 0):
			
 
				+      log.info("len(runningHadoops) == 0, No running cluster?")
			
 
				+      log.info("Skipping __copy_archive_to_dfs")
			
 
				+      hadoopString = ""
			
 
				+    else: hadoopString=runningHadoops[0].path
			
 
				+
			
 
				+    #construct the arguments
			
 
				+    if hodRingOptions['hodring'].has_key('log-destination-uri'):
			
 
				+      cmdString = cmdString + " --log-destination-uri " \
			
 
				+                    + hodRingOptions['hodring']['log-destination-uri']
			
 
				+
			
 
				+    hadoopLogDirs = service.getHadoopLogDirs()
			
 
				+    if hadoopLogDirs:
			
 
				+      cmdString = cmdString \
			
 
				+                    + " --hadoop-log-dirs " \
			
 
				+                    + ",".join(hadoopLogDirs)
			
 
				+
			
 
				+    cmdString = cmdString \
			
 
				+                  + " --temp-dir " \
			
 
				+                  + service._cfg['temp-dir'] \
			
 
				+                  + " --hadoop-command-string " \
			
 
				+                  + hadoopString \
			
 
				+                  + " --user-id " \
			
 
				+                  + service._cfg['userid'] \
			
 
				+                  + " --service-id " \
			
 
				+                  + service._cfg['service-id'] \
			
 
				+                  + " --hodring-debug " \
			
 
				+                  + str(hodRingOptions['hodring']['debug']) \
			
 
				+                  + " --hodring-log-dir " \
			
 
				+                  + hodRingOptions['hodring']['log-dir'] \
			
 
				+                  + " --hodring-cleanup-list " \
			
 
				+                  + ",".join(list)
			
 
				+
			
 
				+    if hodRingOptions['hodring'].has_key('syslog-address'):
			
 
				+      syslogAddr = hodRingOptions['hodring']['syslog-address'][0] + \
			
 
				+                   ':' + str(hodRingOptions['hodring']['syslog-address'][1])
			
 
				+      cmdString = cmdString + " --hodring-syslog-address " + syslogAddr
			
 
				+    if service._cfg.has_key('pkgs'):
			
 
				+      cmdString = cmdString + " --pkgs " + service._cfg['pkgs']
			
 
				+
			
 
				+    if mrSysDirManager is not None:
			
 
				+      cmdString = "%s %s" % (cmdString, mrSysDirManager.toCleanupArgs())
			
 
				+
			
 
				+    log.info("cleanup commandstring : ")
			
 
				+    log.info(cmdString)
			
 
				+
			
 
				+    # clean up
			
 
				+    cmd = ['/bin/sh', '-c', cmdString]
			
 
				+
			
 
				+    mswindows = (sys.platform == "win32")
			
 
				+    originalcwd = os.getcwd()
			
 
				+
			
 
				+    if not mswindows:
			
 
				+      try: 
			
 
				+        pid = os.fork() 
			
 
				+        if pid > 0:
			
 
				+          # exit first parent
			
 
				+          log.info("child(pid: %s) is now doing cleanup" % pid)
			
 
				+          sys.exit(0) 
			
 
				+      except OSError, e: 
			
 
				+        log.error("fork failed: %d (%s)" % (e.errno, e.strerror)) 
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+      # decouple from parent environment
			
 
				+      os.chdir("/") 
			
 
				+      os.setsid() 
			
 
				+      os.umask(0) 
			
 
				+ 
			
 
				+    MAXFD = 128 # more than enough file descriptors to close. Just in case.
			
 
				+    for i in xrange(0, MAXFD):
			
 
				+      try:
			
 
				+        os.close(i)
			
 
				+      except OSError:
			
 
				+        pass
			
 
				+  
			
 
				+    try:
			
 
				+      os.execvp(cmd[0], cmd)
			
 
				+    finally:
			
 
				+      log.critical("exec failed")
			
 
				+      os._exit(1)
			
 
				+
			
 
				+  except Exception, e:
			
 
				+    if service:
			
 
				+      if service.log:
			
 
				+        log = service.log
			
 
				+    else:
			
 
				+      log = getLogger(hodRingOptions['hodring'], 'hodring')
			
 
				+    log.error("Error in bin/hodring %s. \nStack trace:\n%s" %(get_exception_error_string(),get_exception_string()))
			
 
				+    
			
 
				+    log.info("now trying informing to ringmaster")
			
 
				+    log.info(hodRingOptions['hodring']['ringmaster-xrs-addr'])
			
 
				+    log.info(hodRingOptions.normalizeValue('hodring', 'ringmaster-xrs-addr'))
			
 
				+    log.info(to_http_url(hodRingOptions.normalizeValue( \
			
 
				+            'hodring', 'ringmaster-xrs-addr')))
			
 
				+    # Report errors to the Ringmaster if possible
			
 
				+    try:
			
 
				+      ringXRAddress = to_http_url(hodRingOptions.normalizeValue( \
			
 
				+                                     'hodring', 'ringmaster-xrs-addr'))
			
 
				+      log.debug("Creating ringmaster XML-RPC client.")
			
 
				+      ringClient = hodXRClient(ringXRAddress)    
			
 
				+      if ringClient is not None:
			
 
				+        addr = local_fqdn() + "_" + str(os.getpid())
			
 
				+        ringClient.setHodRingErrors(addr, str(e))
			
 
				+        log.info("Reported errors to ringmaster at %s" % ringXRAddress)
			
 
				+    except Exception, e:
			
 
				+      log.error("Failed to report errors to ringmaster at %s" % ringXRAddress)
			
 
				+      log.error("Reason : %s" % get_exception_string())
			
 
				+    # End of reporting errors to the client