14 роки тому · 620f84c82f
--- a/.eclipse.templates/.classpath
+++ b/.eclipse.templates/.classpath
@@ -14,12 +14,13 @@
 
				 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
			
 
				 	<classpathentry kind="var" path="ANT_HOME/lib/ant.jar"/>
			
 
				 	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/commons-cli-1.2.jar"/>
			
 
				-	<classpathentry kind="lib" path="lib/hsqldb-1.8.0.10.jar"/>
			
 
				+  <classpathentry kind="lib" path="lib/hsqldb-1.8.0.10.jar"/>
			
 
				 	<classpathentry kind="lib" path="lib/kfs-0.2.2.jar"/>
			
 
				   	<classpathentry kind="lib" path="lib/jsp-2.1/jsp-2.1.jar"/>
			
 
				   	<classpathentry kind="lib" path="lib/jsp-2.1/jsp-api-2.1.jar"/>
			
 
				-	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/commons-codec-1.3.jar"/>
			
 
				-	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/commons-httpclient-3.0.1.jar"/>
			
 
				+	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/commons-codec-1.4.jar"/>
			
 
				+  <classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/commons-daemon-1.0.1.jar" />
			
 
				+  <classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/commons-httpclient-3.0.1.jar"/>
			
 
				 	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/commons-el-1.0.jar"/>
			
 
				 	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/jasper-compiler-5.5.12.jar"/>
			
 
				 	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/jasper-runtime-5.5.12.jar"/>
			
@@ -29,18 +30,24 @@
 
				 	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/jets3t-0.6.1.jar"/>
			
 
				 	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/junit-3.8.1.jar"/>
			
 
				 	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/log4j-1.2.15.jar"/>
			
 
				+	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/mockito-all-1.8.0.jar"/>
			
 
				 	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/oro-2.0.8.jar"/>
			
 
				   	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/jetty-6.1.14.jar"/>
			
 
				   	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/jetty-util-6.1.14.jar"/>
			
 
				   	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/servlet-api-2.5-6.1.14.jar"/>
			
 
				   	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/core-3.1.1.jar"/>
			
 
				 	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/slf4j-api-1.4.3.jar"/>
			
 
				-	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/slf4j-log4j12-1.4.3.jar"/>
			
 
				-	<classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/xmlenc-0.52.jar"/>
			
 
				+    <classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/slf4j-log4j12-1.4.3.jar"/>
			
 
				+    <classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/xmlenc-0.52.jar"/>
			
 
				+    <classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/jackson-mapper-asl-1.0.1.jar"/>
			
 
				+    <classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/jackson-core-asl-1.0.1.jar"/>
			
 
				+    <classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/aspectjrt-1.6.5.jar"/>
			
 
				+    <classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/aspectjtools-1.6.5.jar"/>
			
 
				 	<classpathentry kind="lib" path="src/test/lib/ftplet-api-1.0.0-SNAPSHOT.jar"/>
			
 
				 	<classpathentry kind="lib" path="src/test/lib/ftpserver-core-1.0.0-SNAPSHOT.jar"/>
			
 
				 	<classpathentry kind="lib" path="src/test/lib/ftpserver-server-1.0.0-SNAPSHOT.jar"/>
			
 
				-	<classpathentry kind="lib" path="src/test/lib/mina-core-2.0.0-M2-20080407.124109-12.jar"/>
			
 
				+    <classpathentry kind="lib" path="src/test/lib/mina-core-2.0.0-M2-20080407.124109-12.jar"/>
			
 
				+    <classpathentry kind="lib" path="build/ivy/lib/Hadoop/common/mockito-all-1.8.0.jar"/>
			
 
				 	<classpathentry kind="lib" path="build/test/classes"/>
			
 
				 	<classpathentry kind="output" path="build/eclipse-classes"/>
			
 
				 </classpath>
			
--- a/.gitignore
+++ b/.gitignore
@@ -19,16 +19,23 @@
 
				 .settings
			
 
				 .svn
			
 
				 build/
			
 
				+build.properties
			
 
				+build-fi/
			
 
				 conf/masters
			
 
				 conf/slaves
			
 
				 conf/hadoop-env.sh
			
 
				 conf/hadoop-site.xml
			
 
				 conf/core-site.xml
			
 
				+conf/mapred-queue-acls.xml
			
 
				 conf/mapred-site.xml
			
 
				 conf/hdfs-site.xml
			
 
				 conf/hadoop-policy.xml
			
 
				 conf/capacity-scheduler.xml
			
 
				 docs/api/
			
 
				+ivy/ivy-*.jar
			
 
				+ivy/maven-ant-tasks-*.jar
			
 
				+junitvmwatch*.properties
			
 
				+junit[0-9]*.properties
			
 
				 logs/
			
 
				 src/contrib/ec2/bin/hadoop-ec2-env.sh
			
 
				 src/contrib/index/conf/index-config.xml
			
@@ -45,3 +52,4 @@ src/docs/build
 
				 src/docs/cn/build
			
 
				 src/docs/cn/src/documentation/sitemap.xmap
			
 
				 src/docs/cn/uming.conf
			
 
				+src/contrib/hdfsproxy/src/test/resources
			
--- a/BUILDME.txt
+++ b/BUILDME.txt
@@ -0,0 +1,31 @@
 
				+Although Hadoop is mostly written in the Java(tm) Programming Language,
			
 
				+there are a number of native libraries written in C++ that need
			
 
				+to be compiled.  The following build instructions are taken from
			
 
				+  http://wiki.apache.org/hadoop/HowToRelease
			
 
				+and describe how to build a tar file containing documentation and
			
 
				+32-bit and 64-bit native libraries on Linux.  Before running the following 
			
 
				+commands, you will need to setup your build machine according to
			
 
				+  http://wiki.apache.org/hadoop/NativeHadoop
			
 
				+
			
 
				+======================================================================
			
 
				+export JAVA_HOME=/path/to/32bit/jdk
			
 
				+export CFLAGS=-m32
			
 
				+export CXXFLAGS=-m32
			
 
				+
			
 
				+ant -Dversion=X.Y.Z -Dcompile.native=true -Dcompile.c++=true \
			
 
				+        -Dlibhdfs=1 -Dlibrecordio=true -Dxercescroot=/usr/local/xerces-c \
			
 
				+        -Declipse.home=/usr/lib/eclipse \
			
 
				+        -Dforrest.home=/usr/local/forrest -Djava5.home=/usr/local/jdk1.5 \
			
 
				+        clean tar
			
 
				+
			
 
				+export JAVA_HOME=/path/to/64bit/jdk
			
 
				+export CFLAGS=-m64
			
 
				+export CXXFLAGS=-m64
			
 
				+
			
 
				+ant -Dversion=X.Y.Z -Dcompile.native=true -Dcompile.c++=true \
			
 
				+        compile-core-native compile-c++ tar
			
 
				+======================================================================
			
 
				+
			
 
				+Once each of the two builds above is complete, you will find a tar file
			
 
				+in the build directory.
			
 
				+
			
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,1628 @@
 
				 Hadoop Change Log
			
 
				 
			
 
				+Release 0.20.100 - unreleased
			
 
				+
			
 
				+    HADOOP-7115. Reduces the number of calls to getpwuid_r and 
			
 
				+    getpwgid_r, by implementing a cache in NativeIO. (ddas)
			
 
				+
			
 
				+    HADOOP-6882. An XSS security exploit in jetty-6.1.14. jetty upgraded. 
			
 
				+    (ddas)
			
 
				+
			
 
				+    MAPREDUCE-2278. Fixes a memory leak in the TaskTracker. (chrisdo)
			
 
				+
			
 
				+    HDFS-1353 redux. Modulate original 1353  to not bump RPC version.
			
 
				+    (jhoman)
			
 
				+
			
 
				+    MAPREDUCE-2082 Race condition in writing the jobtoken password file when 
			
 
				+    launching pipes jobs (jitendra and ddas)
			
 
				+                
			
 
				+    HADOOP-6978. Fixes task log servlet vulnerabilities via symlinks. 
			
 
				+    (Todd Lipcon and Devaraj Das)
			
 
				+
			
 
				+    MAPREDUCE-2178. Write task initialization to avoid race 
			
 
				+    conditions leading to privilege escalation and resource leakage by 
			
 
				+    performing more actiions as the user. (Owen O'Malley, Devaraj Das, 
			
 
				+    Chris Douglas via cdouglas)
			
 
				+
			
 
				+    HDFS-1364. HFTP client should support relogin from keytab
			
 
				+
			
 
				+    HADOOP-6907. Make RPC client to use per-proxy configuration.
			
 
				+    (Kan Zhang via ddas)
			
 
				+
			
 
				+    MAPREDUCE-2055. Fix JobTracker to decouple job retirement from copy of 
			
 
				+    job-history file to HDFS and enhance RetiredJobInfo to carry aggregated 
			
 
				+    job-counters to prevent a disk roundtrip on job-completion to fetch 
			
 
				+    counters for the JobClient. (Krishna Ramachandran via acmurthy)
			
 
				+                
			
 
				+    HDFS-1353. Remove most of getBlockLocation optimization (jghoman)
			
 
				+
			
 
				+    MAPREDUCE-2023. TestDFSIO read test may not read specified bytes. (htang)
			
 
				+
			
 
				+    HDFS-1340. A null delegation token is appended to the url if security is 
			
 
				+    disabled when browsing filesystem.(boryas)
			
 
				+
			
 
				+    HDFS-1352. Fix jsvc.location. (jghoman)
			
 
				+
			
 
				+    HADOOP-6860. 'compile-fault-inject' should never be called directly. (cos)
			
 
				+
			
 
				+    MAPREDUCE-2005. TestDelegationTokenRenewal fails (boryas)
			
 
				+
			
 
				+    MAPREDUCE-2000. Rumen is not able to extract counters for Job history logs
			
 
				+    from Hadoop 0.20. (htang)
			
 
				+
			
 
				+    MAPREDUCE-1961. ConcurrentModificationException when shutting down Gridmix.
			
 
				+    (htang)
			
 
				+
			
 
				+    HADOOP-6899. RawLocalFileSystem set working directory does
			
 
				+    not work for relative names. (suresh)
			
 
				+
			
 
				+    HDFS-495. New clients should be able to take over files lease if the old 
			
 
				+    client died. (shv)
			
 
				+
			
 
				+    HADOOP-6728. Re-design and overhaul of the Metrics framework. (Luke Lu via
			
 
				+    acmurthy)
			
 
				+
			
 
				+    MAPREDUCE-1966. Change blacklisting of tasktrackers on task failures to be
			
 
				+    a simple graylist to fingerpoint bad tasktrackers. (Greg Roelofs via
			
 
				+    acmurthy)
			
 
				+
			
 
				+    HADOOP-6864. Add ability to get netgroups (as returned by getent
			
 
				+    netgroup command) using native code (JNI) instead of forking. (Erik Steffl)
			
 
				+
			
 
				+    HDFS-1318. HDFS Namenode and Datanode WebUI information needs to be 
			
 
				+    accessible programmatically for scripts. (Tanping Wang via suresh)
			
 
				+
			
 
				+    HDFS-1315. Add fsck event to audit log and remove other audit log events 
			
 
				+    corresponding to FSCK listStatus and open calls. (suresh)
			
 
				+
			
 
				+    MAPREDUCE-1941. Provides access to JobHistory file (raw) with job user/acl 
			
 
				+    permission. (Srikanth Sundarrajan via ddas)
			
 
				+
			
 
				+    MAPREDUCE-291. Optionally a separate daemon should serve JobHistory.
			
 
				+    (Srikanth Sundarrajan via ddas)
			
 
				+
			
 
				+    MAPREDUCE-1936. Make Gridmix3 more customizable (sync changes from trunk). (htang)
			
 
				+
			
 
				+    HADOOP-5981. Fix variable substitution during parsing of child environment
			
 
				+    variables. (Krishna Ramachandran via acmurthy)
			
 
				+
			
 
				+    MAPREDUCE-339. Greedily schedule failed tasks to cause early job failure.
			
 
				+    (cdouglas)
			
 
				+
			
 
				+    MAPREDUCE-1872. Hardened CapacityScheduler to have comprehensive, coherent
			
 
				+    limits on tasks/jobs for jobs/users/queues. Also, added the ability to
			
 
				+    refresh queue definitions without the need to restart the JobTracker.
			
 
				+    (acmurthy)
			
 
				+
			
 
				+    HDFS-1161. Make DN minimum valid volumes configurable. (shv)
			
 
				+
			
 
				+    HDFS-457. Reintroduce volume failure tolerance for DataNodes. (shv)
			
 
				+
			
 
				+    HDFS-1307 Add start time, end time and total time taken for FSCK 
			
 
				+    to FSCK report. (suresh)
			
 
				+
			
 
				+    MAPREDUCE-1207. Sanitize user environment of map/reduce tasks and allow 
			
 
				+    admins to set environment and java options. (Krishna Ramachandran via
			
 
				+    acmurthy) 
			
 
				+
			
 
				+    HDFS-1298 - Add support in HDFS for new statistics added in FileSystem
			
 
				+    to track the file system operations (suresh)
			
 
				+
			
 
				+    HDFS-1301. TestHDFSProxy need to use server side conf for ProxyUser  
			
 
				+    stuff.(boryas)
			
 
				+
			
 
				+    HADOOP-6859 - Introduce additional statistics to FileSystem to track 
			
 
				+    file system operations (suresh)
			
 
				+
			
 
				+    HADOOP-6818. Provides a JNI implementation of Unix Group resolution. The 
			
 
				+    config hadoop.security.group.mapping should be set to 
			
 
				+    org.apache.hadoop.security.JniBasedUnixGroupsMapping to enable this
			
 
				+    implementation. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1938. Introduces a configuration for putting user classes before
			
 
				+    the system classes during job submission and in task launches. Two things
			
 
				+    need to be done in order to use this feature - 
			
 
				+    (1) mapreduce.user.classpath.first : this should be set to true in the 
			
 
				+    jobconf, and, (2) HADOOP_USER_CLASSPATH_FIRST : this is relevant for job 
			
 
				+    submissions done using bin/hadoop shell script. HADOOP_USER_CLASSPATH_FIRST
			
 
				+    should be defined in the environment with some non-empty value 
			
 
				+    (like "true"), and then bin/hadoop should be executed. (ddas)
			
 
				+
			
 
				+    HADOOP-6669. Respect compression configuration when creating DefaultCodec
			
 
				+    compressors. (Koji Noguchi via cdouglas)
			
 
				+
			
 
				+    HADOOP-6855. Add support for netgroups, as returned by command
			
 
				+    getent netgroup. (Erik Steffl)
			
 
				+
			
 
				+    HDFS-599. Allow NameNode to have a seprate port for service requests from
			
 
				+    client requests. (Dmytro Molkov via hairong)
			
 
				+
			
 
				+    HDFS-132. Fix namenode to not report files deleted metrics for deletions
			
 
				+    done while replaying edits during startup. (shv)
			
 
				+
			
 
				+    MAPREDUCE-1521. Protection against incorrectly configured reduces
			
 
				+    (mahadev) 
			
 
				+
			
 
				+    MAPREDUCE-1936. Make Gridmix3 more customizable. (htang)
			
 
				+
			
 
				+    MAPREDUCE-517. Enhance the CapacityScheduler to assign multiple tasks
			
 
				+    per-heartbeat. (acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-323. Re-factor layout of JobHistory files on HDFS to improve 
			
 
				+    operability. (Dick King via acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-1921. Ensure exceptions during reading of input data in map
			
 
				+    tasks are augmented by information about actual input file which caused
			
 
				+    the exception. (Krishna Ramachandran via acmurthy)  
			
 
				+
			
 
				+    MAPREDUCE-1118. Enhance the JobTracker web-ui to ensure tabular columns
			
 
				+    are sortable, also added a /scheduler servlet to CapacityScheduler for
			
 
				+    enhanced UI for queue information. (Krishna Ramachandran via acmurthy) 
			
 
				+
			
 
				+    HADOOP-5913. Add support for starting/stopping queues. (cdouglas)
			
 
				+
			
 
				+    HADOOP-6835. Add decode support for concatenated gzip files. (roelofs)
			
 
				+
			
 
				+    HDFS-1158. Revert HDFS-457. (shv)
			
 
				+
			
 
				+    MAPREDUCE-1699. Ensure JobHistory isn't disabled for any reason. (Krishna
			
 
				+    Ramachandran via acmurthy)
			
 
				+
			
 
				+    MAPREDUCE-1682. Fix speculative execution to ensure tasks are not
			
 
				+    scheduled after job failure. (acmurthy)
			
 
				+
			
 
				+    MAPREDUCE-1914. Ensure unique sub-directories for artifacts in the
			
 
				+    DistributedCache are cleaned up. (Dick King via acmurthy)
			
 
				+
			
 
				+    HADOOP-6713. Multiple RPC Reader Threads (Bharathm)
			
 
				+
			
 
				+    HDFS-1250. Namenode should reject block reports and block received
			
 
				+    requests from dead datanodes (suresh)
			
 
				+
			
 
				+    MAPREDUCE-1863. [Rumen] Null failedMapAttemptCDFs in job traces generated 
			
 
				+    by Rumen. (htang)
			
 
				+
			
 
				+    MAPREDUCE-1309. Rumen refactory. (htang)
			
 
				+
			
 
				+    HDFS-1114. Implement LightWeightGSet for BlocksMap in order to reduce
			
 
				+    NameNode memory footprint.  (szetszwo)
			
 
				+
			
 
				+    MAPREDUCE-572. Fixes DistributedCache.checkURIs to throw error if link is
			
 
				+    missing for uri in cache archives. (amareshwari)
			
 
				+
			
 
				+    MAPREDUCE-787. Fix JobSubmitter to honor user given symlink in the path.
			
 
				+    (amareshwari)
			
 
				+
			
 
				+    HADOOP-6815. refreshSuperUserGroupsConfiguration should use 
			
 
				+    server side configuration for the refresh( boryas)
			
 
				+
			
 
				+    MAPREDUCE-1868. Add a read and connection timeout to JobClient while
			
 
				+    pulling tasklogs. (Krishna Ramachandran via acmurthy)   
			
 
				+
			
 
				+    HDFS-1119. Introduce a GSet interface to BlocksMap.  (szetszwo)
			
 
				+
			
 
				+    MAPREDUCE-1778. Ensure failure to setup CompletedJobStatusStore is not
			
 
				+    silently ignored by the JobTracker. (Krishna Ramachandran via acmurthy)  
			
 
				+
			
 
				+    MAPREDUCE-1538. Add a limit on the number of artifacts in the
			
 
				+    DistributedCache to ensure we cleanup aggressively. (Dick King via
			
 
				+    acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-1850. Add information about the host from which a job is
			
 
				+    submitted. (Krishna Ramachandran via acmurthy) 
			
 
				+
			
 
				+    HDFS-1110. Reuses objects for commonly used file names in namenode to
			
 
				+    reduce the heap usage. (suresh)
			
 
				+
			
 
				+    HADOOP-6810. Extract a subset of tests for smoke (DOA) validation. (cos)
			
 
				+
			
 
				+    HADOOP-6642. Remove debug stmt left from original patch. (cdouglas)
			
 
				+
			
 
				+    HADOOP-6808. Add comments on how to setup File/Ganglia Context for
			
 
				+    kerberos metrics (Erik Steffl)
			
 
				+
			
 
				+    HDFS-1061.  INodeFile memory optimization. (bharathm)
			
 
				+
			
 
				+    HDFS-1109. HFTP supports filenames that contains the character "+".
			
 
				+    (Dmytro Molkov via dhruba, backported by szetszwo)
			
 
				+
			
 
				+    HDFS-1085. Check file length and bytes read when reading a file through
			
 
				+    hftp in order to detect failure.  (szetszwo)
			
 
				+
			
 
				+    HDFS-1311. Running tests with 'testcase' cause triple execution of the
			
 
				+    same test case (cos)
			
 
				+
			
 
				+    HDFS-1150.FIX.  Verify datanodes' identities to clients in secure clusters.
			
 
				+    Update to patch to improve handling of jsvc source in build.xml (jghoman)
			
 
				+
			
 
				+    HADOOP-6752. Remote cluster control functionality needs JavaDocs
			
 
				+    improvement. (Balaji Rajagopalan via cos)
			
 
				+
			
 
				+    MAPREDUCE-1288. Fixes TrackerDistributedCacheManager to take into account
			
 
				+    the owner of the localized file in the mapping from cache URIs to 
			
 
				+    CacheStatus objects. (ddas)
			
 
				+    
			
 
				+    MAPREDUCE-1682. Fix speculative execution to ensure tasks are not
			
 
				+    scheduled after job failure. (acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-1914. Ensure unique sub-directories for artifacts in the
			
 
				+    DistributedCache are cleaned up. (Dick King via acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-1538. Add a limit on the number of artifacts in the
			
 
				+    DistributedCache to ensure we cleanup aggressively. (Dick King via
			
 
				+    acmurthy)
			
 
				+
			
 
				+    MAPREDUCE-1900. Fixes a FS leak that i missed in the earlier patch.
			
 
				+    (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1900. Makes JobTracker/TaskTracker close filesystems, created
			
 
				+    on behalf of users, when they are no longer needed. (ddas)
			
 
				+
			
 
				+    HADOOP-6832. Add a static user plugin for web auth for external users.
			
 
				+    (omalley)
			
 
				+
			
 
				+    HDFS-1007. Fixes a bug in SecurityUtil.buildDTServiceName to do
			
 
				+    with handling of null hostname. (omalley)
			
 
				+
			
 
				+    HDFS-1007. makes long running servers using hftp work. Also has some
			
 
				+    refactoring in the MR code to do with handling of delegation tokens. 
			
 
				+    (omalley & ddas)
			
 
				+
			
 
				+    HDFS-1178. The NameNode servlets should not use RPC to connect to the
			
 
				+    NameNode. (omalley)
			
 
				+
			
 
				+    MAPREDUCE-1807. Re-factor TestQueueManager. (Richard King via acmurthy)
			
 
				+
			
 
				+    HDFS-1150. Fixes the earlier patch to do logging in the right directory
			
 
				+    and also adds facility for monitoring processes (via -Dprocname in the
			
 
				+    command line). (Jakob Homan via ddas)
			
 
				+
			
 
				+    HADOOP-6781. security audit log shouldn't have exception in it. (boryas)
			
 
				+
			
 
				+    HADOOP-6776. Fixes the javadoc in UGI.createProxyUser. (ddas)
			
 
				+
			
 
				+    HDFS-1150. building jsvc from source tar. source tar is also checked in.
			
 
				+    (jitendra)
			
 
				+
			
 
				+    HDFS-1150. Bugfix in the hadoop shell script. (ddas)
			
 
				+
			
 
				+    HDFS-1153. The navigation to /dfsnodelist.jsp with invalid input 
			
 
				+    parameters produces NPE and HTTP 500 error (rphulari)
			
 
				+    
			
 
				+    MAPREDUCE-1664. Bugfix to enable queue administrators of a queue to
			
 
				+    view job details of jobs submitted to that queue even though they
			
 
				+    are not part of acl-view-job. 
			
 
				+
			
 
				+    HDFS-1150. Bugfix to add more knobs to secure datanode starter.
			
 
				+
			
 
				+    HDFS-1157. Modifications introduced by HDFS-1150 are breaking aspect's
			
 
				+    bindings (cos)
			
 
				+
			
 
				+    HDFS-1130. Adds a configuration dfs.cluster.administrators for 
			
 
				+    controlling access to the default servlets in hdfs. (ddas)
			
 
				+
			
 
				+    HADOOP-6706.FIX. Relogin behavior for RPC clients could be improved 
			
 
				+    (boryas)
			
 
				+
			
 
				+    HDFS-1150. Verify datanodes' identities to clients in secure clusters.
			
 
				+    (jghoman)
			
 
				+
			
 
				+    MAPREDUCE-1442. Fixed regex in job-history related to parsing Counter
			
 
				+    values. (Luke Lu via acmurthy)  
			
 
				+
			
 
				+    HADOOP-6760. WebServer shouldn't increase port number in case of negative
			
 
				+    port setting caused by Jetty's race. (cos)
			
 
				+
			
 
				+    HDFS-1146. Javadoc for getDelegationTokenSecretManager in FSNamesystem.
			
 
				+    (jitendra)
			
 
				+
			
 
				+    HADOOP-6706. Fix on top of the earlier patch. Closes the connection
			
 
				+    on a SASL connection failure, and retries again with a new
			
 
				+    connection. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1716. Fix on top of earlier patch for logs truncation a.k.a 
			
 
				+    MAPREDUCE-1100. Addresses log truncation issues when binary data is
			
 
				+    written to log files and adds a header to a truncated log file to
			
 
				+    inform users of the done trucation.
			
 
				+
			
 
				+    HDFS-1383. Improve the error messages when using hftp://.
			
 
				+
			
 
				+    MAPREDUCE-1744. Fixed DistributedCache apis to take a user-supplied
			
 
				+    FileSystem to allow for better proxy behaviour for Oozie. (Richard King) 
			
 
				+
			
 
				+    MAPREDUCE-1733. Authentication between pipes processes and java counterparts.
			
 
				+    (jitendra)
			
 
				+
			
 
				+    MAPREDUCE-1664. Bugfix on top of the previous patch. (ddas)
			
 
				+
			
 
				+    HDFS-1136. FileChecksumServlets.RedirectServlet doesn't carry forward 
			
 
				+    the delegation token (boryas)
			
 
				+
			
 
				+    HADOOP-6756. Change value of FS_DEFAULT_NAME_KEY from fs.defaultFS
			
 
				+    to fs.default.name which is a correct name for 0.20 (steffl)
			
 
				+
			
 
				+    HADOOP-6756. Document (javadoc comments) and cleanup configuration
			
 
				+    keys in CommonConfigurationKeys.java (steffl)
			
 
				+
			
 
				+    MAPREDUCE-1759. Exception message for unauthorized user doing killJob,
			
 
				+    killTask, setJobPriority needs to be improved. (gravi via vinodkv)
			
 
				+
			
 
				+    HADOOP-6715. AccessControlList.toString() returns empty string when
			
 
				+    we set acl to "*". (gravi via vinodkv)
			
 
				+
			
 
				+    HADOOP-6757. NullPointerException for hadoop clients launched from
			
 
				+    streaming tasks. (amarrk via vinodkv)
			
 
				+
			
 
				+    HADOOP-6631. FileUtil.fullyDelete() should continue to delete other files
			
 
				+    despite failure at any level. (vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-1317. NPE in setHostName in Rumen. (rksingh)
			
 
				+
			
 
				+    MAPREDUCE-1754. Replace mapred.persmissions.supergroup with an acl : 
			
 
				+    mapreduce.cluster.administrators and HADOOP-6748.: Remove
			
 
				+    hadoop.cluster.administrators. Contributed by Amareshwari Sriramadasu.
			
 
				+
			
 
				+    HADOOP-6701.  Incorrect exit codes for "dfs -chown", "dfs -chgrp"
			
 
				+    (rphulari)
			
 
				+
			
 
				+    HADOOP-6640. FileSystem.get() does RPC retires within a static
			
 
				+    synchronized block. (hairong)
			
 
				+
			
 
				+    HDFS-1006. Removes unnecessary logins from the previous patch. (ddas)
			
 
				+
			
 
				+    HADOOP-6745. adding some java doc to Server.RpcMetrics, UGI (boryas)
			
 
				+
			
 
				+    MAPREDUCE-1707. TaskRunner can get NPE in getting ugi from TaskTracker. (vinodkv)
			
 
				+
			
 
				+    HDFS-1104. Fsck triggers full GC on NameNode. (hairong)
			
 
				+
			
 
				+    HADOOP-6332. Large-scale Automated Test Framework (sharad, Sreekanth
			
 
				+    Ramakrishnan, at all via cos)
			
 
				+
			
 
				+    HADOOP-6526. Additional fix for test context on top of existing one. (cos)
			
 
				+
			
 
				+    HADOOP-6710. Symbolic umask for file creation is not conformant with posix.
			
 
				+    (suresh)
			
 
				+
			
 
				+    HADOOP-6693. Added metrics to track kerberos login success and failure.
			
 
				+    (suresh)
			
 
				+
			
 
				+    MAPREDUCE-1711. Gridmix should provide an option to submit jobs to the same
			
 
				+    queues as specified in the trace. (rksing via htang)
			
 
				+
			
 
				+    MAPREDUCE-1687. Stress submission policy does not always stress the
			
 
				+    cluster. (htang)
			
 
				+
			
 
				+    MAPREDUCE-1641. Bug-fix to ensure command line options such as
			
 
				+    -files/-archives are checked for duplicate artifacts in the
			
 
				+    DistributedCache. (Amareshwari Sreeramadasu via acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-1641. Fix DistributedCache to ensure same files cannot be put in
			
 
				+    both the archives and files sections. (Richard King via acmurthy) 
			
 
				+
			
 
				+    HADOOP-6670. Fixes a testcase issue introduced by the earlier commit
			
 
				+    of the HADOOP-6670 patch. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1718. Fixes a problem to do with correctly constructing
			
 
				+    service name for the delegation token lookup in HftpFileSystem
			
 
				+    (borya via ddas)
			
 
				+
			
 
				+    HADOOP-6674. Fixes the earlier patch to handle pings correctly (ddas).
			
 
				+
			
 
				+    MAPREDUCE-1664. Job Acls affect when Queue Acls are set. 
			
 
				+    (Ravi Gummadi via vinodkv)
			
 
				+
			
 
				+    HADOOP-6718. Fixes a problem to do with clients not closing RPC
			
 
				+    connections on a SASL failure. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1397. NullPointerException observed during task failures.
			
 
				+    (Amareshwari Sriramadasu via vinodkv)
			
 
				+
			
 
				+    HADOOP-6670. Use the UserGroupInformation's Subject as the criteria for
			
 
				+    equals and hashCode. (omalley)
			
 
				+
			
 
				+    HADOOP-6716. System won't start in non-secure mode when kerb5.conf 
			
 
				+   (edu.mit.kerberos on Mac) is not present. (boryas)
			
 
				+
			
 
				+    MAPREDUCE-1607. Task controller may not set permissions for a 
			
 
				+    task cleanup attempt's log directory. (Amareshwari Sreeramadasu via 
			
 
				+    vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-1533. JobTracker performance enhancements. (Amar Kamat via 
			
 
				+    vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-1701.  AccessControlException while renewing a delegation token 
			
 
				+    in not correctly handled in the JobTracker. (boryas)
			
 
				+
			
 
				+    HDFS-481. Incremental patch to fix broken unit test in contrib/hdfsproxy
			
 
				+
			
 
				+    HADOOP-6706. Fixes a bug in the earlier version of the same patch (ddas)
			
 
				+
			
 
				+    HDFS-1096. allow dfsadmin/mradmin refresh of superuser proxy group 
			
 
				+    mappings(boryas).
			
 
				+
			
 
				+    HDFS-1012. Support for cluster specific path entries in ldap for hdfsproxy
			
 
				+    (Srikanth Sundarrajan via Nicholas)
			
 
				+
			
 
				+    HDFS-1011. Improve Logging in HDFSProxy to include cluster name associated
			
 
				+    with the request (Srikanth Sundarrajan via Nicholas)
			
 
				+
			
 
				+    HDFS-1010. Retrieve group information from UnixUserGroupInformation 
			
 
				+    instead of LdapEntry (Srikanth Sundarrajan via Nicholas)
			
 
				+
			
 
				+    HDFS-481. Bug fix - hdfsproxy: Stack overflow + Race conditions
			
 
				+    (Srikanth Sundarrajan via Nicholas)
			
 
				+
			
 
				+    MAPREDUCE-1657. After task logs directory is deleted, tasklog servlet
			
 
				+    displays wrong error message about job ACLs. (Ravi Gummadi via vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-1692. Remove TestStreamedMerge from the streaming tests.
			
 
				+    (Amareshwari Sriramadasu and Sreekanth Ramakrishnan via vinodkv)
			
 
				+
			
 
				+    HDFS-1081. Performance regression in 
			
 
				+    DistributedFileSystem::getFileBlockLocations in secure systems (jhoman)
			
 
				+    
			
 
				+    MAPREDUCE-1656. JobStory should provide queue info. (htang)
			
 
				+
			
 
				+    MAPREDUCE-1317. Reducing memory consumption of rumen objects. (htang)
			
 
				+
			
 
				+    MAPREDUCE-1317. Reverting the patch since it caused build failures. (htang)
			
 
				+
			
 
				+    MAPREDUCE-1683. Fixed jobtracker web-ui to correctly display heap-usage.
			
 
				+    (acmurthy)
			
 
				+    
			
 
				+    HADOOP-6706. Fixes exception handling for saslConnect. The ideal
			
 
				+    solution is to the Refreshable interface but as Owen noted in 
			
 
				+    HADOOP-6656, it doesn't seem to work as expected. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1617. TestBadRecords failed once in our test runs. (Amar
			
 
				+    Kamat via vinodkv).
			
 
				+
			
 
				+    MAPREDUCE-587. Stream test TestStreamingExitStatus fails with Out of
			
 
				+    Memory. (Amar Kamat via vinodkv).
			
 
				+
			
 
				+    HDFS-1096. Reverting the patch since it caused build failures. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1317. Reducing memory consumption of rumen objects. (htang)
			
 
				+
			
 
				+    MAPREDUCE-1680. Add a metric to track number of heartbeats processed by the
			
 
				+    JobTracker. (Richard King via acmurthy)
			
 
				+
			
 
				+    MAPREDUCE-1683.  Removes JNI calls to get jvm current/max heap usage in
			
 
				+    ClusterStatus by default. (acmurthy)
			
 
				+
			
 
				+    HADOOP-6687.  user object in the subject in UGI should be reused in case 
			
 
				+    of a relogin. (jitendra)
			
 
				+
			
 
				+    HADOOP-5647. TestJobHistory fails if /tmp/_logs is not writable to. 
			
 
				+    Testcase should not depend on /tmp. (Ravi Gummadi via vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-181. Bug fix for Secure job submission. (Ravi Gummadi via 
			
 
				+    vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-1635. ResourceEstimator does not work after MAPREDUCE-842. 
			
 
				+    (Amareshwari Sriramadasu via vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-1526. Cache the job related information while submitting the 
			
 
				+    job. (rksingh)
			
 
				+
			
 
				+    HADOOP-6674. Turn off SASL checksums for RPCs. (jitendra via omalley)
			
 
				+
			
 
				+    HADOOP-5958. Replace fork of DF with library call. (cdouglas via omalley)
			
 
				+
			
 
				+    HDFS-999.  Secondary namenode should login using kerberos if security
			
 
				+    is configured. Bugfix to original patch. (jhoman)
			
 
				+
			
 
				+    MAPREDUCE-1594. Support for SleepJobs in Gridmix (rksingh)
			
 
				+
			
 
				+    HDFS-1007. Fix. ServiceName for delegation token for Hftp has hftp
			
 
				+    port and not RPC port. 
			
 
				+    
			
 
				+    MAPREDUCE-1376. Support for varied user submissions in Gridmix (rksingh)
			
 
				+
			
 
				+    HDFS-1080.  SecondaryNameNode image transfer should use the defined 
			
 
				+    http address rather than local ip address (jhoman)
			
 
				+    
			
 
				+    HADOOP-6661. User document for UserGroupInformation.doAs for secure
			
 
				+    impersonation. (jitendra)
			
 
				+
			
 
				+    MAPREDUCE-1624. Documents the job credentials and associated details 
			
 
				+    to do with delegation tokens (ddas)
			
 
				+    HDFS-1036. Documentation for fetchdt for forrest (boryas)
			
 
				+    HDFS-1039. New patch on top of previous patch. Gets namenode address
			
 
				+    from conf. (jitendra)
			
 
				+
			
 
				+    HADOOP-6656. Renew Kerberos TGT when 80% of the renew lifetime has been
			
 
				+    used up. (omalley)
			
 
				+
			
 
				+    HADOOP-6653. Protect against NPE in setupSaslConnection when real user is
			
 
				+    null. (omalley)
			
 
				+
			
 
				+    HADOOP-6649. An error in the previous committed patch. (jitendra)
			
 
				+
			
 
				+    HADOOP-6652. ShellBasedUnixGroupsMapping shouldn't have a cache. 
			
 
				+    (ddas)
			
 
				+
			
 
				+    HADOOP-6649. login object in UGI should be inside the subject
			
 
				+    (jitendra)
			
 
				+
			
 
				+    HADOOP-6637.  Benchmark overhead of RPC session establishment
			
 
				+    (shv via jitendra)
			
 
				+
			
 
				+    HADOOP-6648. Credentials must ignore null tokens that can be generated
			
 
				+    when using HFTP to talk to insecure clusters. (omalley)
			
 
				+
			
 
				+    HADOOP-6632. Fix on JobTracker to reuse filesystem handles if possible.
			
 
				+    (ddas)
			
 
				+
			
 
				+    HADOOP-6647. balancer fails with "is not authorized for protocol 
			
 
				+    interface NamenodeProtocol" in secure environment (boryas)
			
 
				+
			
 
				+    MAPREDUCE-1612. job conf file is not accessible from job history 
			
 
				+    web page. (Ravi Gummadi via vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-1611. Refresh nodes and refresh queues doesnt work with 
			
 
				+    service authorization enabled. (Amar Kamat via vinodkv)
			
 
				+
			
 
				+    HADOOP-6644. util.Shell getGROUPS_FOR_USER_COMMAND method 
			
 
				+   name - should use common naming convention (boryas)
			
 
				+
			
 
				+    MAPREDUCE-1609. Fixes a problem with localization of job log
			
 
				+    directories when tasktracker is re-initialized that can result
			
 
				+    in failed tasks. (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1610. Update forrest documentation for directory
			
 
				+    structure of localized files. (Ravi Gummadi via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1532. Fixes a javadoc and an exception message in JobInProgress
			
 
				+    when the authenticated user is different from the user in conf. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1417. Update forrest documentation for private
			
 
				+    and public distributed cache files. (Ravi Gummadi via yhemanth)
			
 
				+
			
 
				+    HADOOP-6634. AccessControlList uses full-principal names to verify acls
			
 
				+    causing queue-acls to fail (vinodkv)
			
 
				+    
			
 
				+    HADOOP-6642. Fix javac, javadoc, findbugs warnings. (chrisdo via acmurthy) 
			
 
				+
			
 
				+    HDFS-1044.  Cannot submit mapreduce job from secure client to 
			
 
				+    unsecure sever. (boryas)
			
 
				+    HADOOP-6638. try to relogin in a case of failed RPC connection 
			
 
				+    (expired tgt) only in case the subject is loginUser or  
			
 
				+    proxyUgi.realUser. (boryas)
			
 
				+
			
 
				+    HADOOP-6632. Support for using different Kerberos keys for different 
			
 
				+    instances of Hadoop services. (jitendra)
			
 
				+
			
 
				+    HADOOP-6526. Need mapping from long principal names to local OS 
			
 
				+    user names. (jitendra)
			
 
				+
			
 
				+    MAPREDUCE-1604. Update Forrest documentation for job authorization
			
 
				+    ACLs. (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    HDFS-1045.  In secure clusters, re-login is necessary for https 
			
 
				+    clients before opening connections (jhoman)
			
 
				+
			
 
				+    HADOOP-6603.  Addition to original patch to be explicit
			
 
				+    about new method not being for general use. (jhoman)
			
 
				+
			
 
				+    MAPREDUCE-1543. Add audit log messages for job and queue
			
 
				+    access control checks. (Amar Kamat via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1606. Fixed occassinal timeout in TestJobACL. (Ravi Gummadi via
			
 
				+    acmurthy)
			
 
				+
			
 
				+   HADOOP-6633. normalize property names for JT/NN kerberos principal 
			
 
				+    names in configuration. (boryas)
			
 
				+
			
 
				+    HADOOP-6613. Changes the RPC server so that version is checked first
			
 
				+    on an incoming connection. (Kan Zhang via ddas)
			
 
				+
			
 
				+    HADOOP-5592. Fix typo in Streaming doc in reference to GzipCodec.
			
 
				+    (Corinne Chandel via tomwhite)
			
 
				+
			
 
				+    MAPREDUCE-813. Updates Streaming and M/R tutorial documents.
			
 
				+    (Corinne Chandel via ddas)
			
 
				+
			
 
				+    MAPREDUCE-927. Cleanup of task-logs should happen in TaskTracker instead
			
 
				+    of the Child. (Amareshwari Sriramadasu via vinodkv)
			
 
				+
			
 
				+    HDFS-1039. Service should be set in the token in JspHelper.getUGI.
			
 
				+    (jitendra)
			
 
				+
			
 
				+    MAPREDUCE-1599. MRBench reuses jobConf and credentials there in.
			
 
				+    (jitendra)
			
 
				+
			
 
				+    MAPREDUCE-1522. FileInputFormat may use the default FileSystem for the
			
 
				+    input path. (Tsz Wo (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HDFS-1036. In DelegationTokenFetch pass Configuration object so getDefaultUri
			
 
				+    will work correctly.
			
 
				+
			
 
				+    HDFS-1038. In nn_browsedfscontent.jsp fetch delegation token only if 
			
 
				+    security is enabled. (jitendra)
			
 
				+
			
 
				+    HDFS-1036. in DelegationTokenFetch dfs.getURI returns no port (boryas)
			
 
				+
			
 
				+    HADOOP-6598. Verbose logging from the Group class (one more case)
			
 
				+    (boryas)
			
 
				+
			
 
				+    HADOOP-6627. Bad Connection to FS" message in FSShell should print 
			
 
				+    message from the exception (boryas)
			
 
				+
			
 
				+    HDFS-1033. In secure clusters, NN and SNN should verify that the remote 
			
 
				+    principal during image and edits transfer (jhoman)
			
 
				+
			
 
				+    HDFS-1005. Fixes a bug to do with calling the cross-realm API in Fsck
			
 
				+    client. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1422. Fix cleanup of localized job directory to work if files
			
 
				+    with non-deletable permissions are created within it.
			
 
				+    (Amar Kamat via yhemanth)
			
 
				+
			
 
				+    HDFS-1007. Fixes bugs to do with 20S cluster talking to 20 over 
			
 
				+    hftp (borya)
			
 
				+
			
 
				+    MAPREDUCE:1566. Fixes bugs in the earlier patch. (ddas)
			
 
				+
			
 
				+    HDFS-992. A bug in backport for HDFS-992. (jitendra)
			
 
				+
			
 
				+    HADOOP-6598. Remove verbose logging from the Groups class. (borya)
			
 
				+    HADOOP-6620. NPE if renewer is passed as null in getDelegationToken.
			
 
				+    (jitendra)
			
 
				+
			
 
				+    HDFS-1023. Second Update to original patch to fix username (jhoman)
			
 
				+
			
 
				+    MAPREDUCE-1435. Add test cases to already committed patch for this
			
 
				+    jira, synchronizing changes with trunk. (yhemanth)
			
 
				+
			
 
				+    HADOOP-6612. Protocols RefreshUserToGroupMappingsProtocol and 
			
 
				+    RefreshAuthorizationPolicyProtocol  authorization settings thru 
			
 
				+    KerberosInfo (boryas)
			
 
				+
			
 
				+    MAPREDUCE-1566. Bugfix for tests on top of the earlier patch. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1566. Mechanism to import tokens and secrets from a file in to
			
 
				+    the submitted job. (omalley)
			
 
				+
			
 
				+    HADOOP-6603. Provide workaround for issue with Kerberos not
			
 
				+    resolving corss-realm principal. (kan via jhoman)
			
 
				+
			
 
				+    HDFS-1023. Update to original patch to fix username (jhoman)
			
 
				+
			
 
				+    HDFS-814. Add an api to get the visible length of a 
			
 
				+    DFSDataInputStream. (hairong)
			
 
				+
			
 
				+    HDFS-1023. Allow http server to start as regular user if https
			
 
				+    principal is not defined. (jhoman)
			
 
				+
			
 
				+    HDFS-1022. Merge all three test specs files (common, hdfs, mapred)
			
 
				+    into one. (steffl)
			
 
				+
			
 
				+    HDFS-101. DFS write pipeline: DFSClient sometimes does not detect
			
 
				+    second datanode failure. (hairong)
			
 
				+
			
 
				+    HDFS-1015. Intermittent failure in TestSecurityTokenEditLog. (jitendra)
			
 
				+
			
 
				+    MAPREDUCE-1550. A bugfix on top of what was committed earlier (ddas).
			
 
				+
			
 
				+    MAPREDUCE-1155. DISABLING THE TestStreamingExitStatus temporarily. (ddas)
			
 
				+
			
 
				+    HDFS-1020. Changes the check for renewer from short name to long name
			
 
				+    in the cancel/renew delegation token methods. (jitendra via ddas)
			
 
				+
			
 
				+    HDFS-1019. Fixes values of delegation token parameters in
			
 
				+    hdfs-default.xml. (jitendra via ddas)
			
 
				+
			
 
				+    MAPREDUCE-1430. Fixes a backport issue with the earlier patch. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1559. Fixes a problem in DelegationTokenRenewal class to
			
 
				+    do with using the right credentials when talking to the NameNode.(ddas)
			
 
				+
			
 
				+    MAPREDUCE-1550. Fixes a problem to do with creating a filesystem using
			
 
				+    the user's UGI in the JobHistory browsing. (ddas)
			
 
				+
			
 
				+    HADOOP-6609. Fix UTF8 to use a thread local DataOutputBuffer instead of
			
 
				+    a static that was causing a deadlock in RPC. (omalley)
			
 
				+
			
 
				+    HADOOP-6584. Fix javadoc warnings introduced by original HADOOP-6584
			
 
				+    patch (jhoman)
			
 
				+    
			
 
				+    HDFS-1017. browsedfs jsp should call JspHelper.getUGI rather than using 
			
 
				+    createRemoteUser(). (jhoman)
			
 
				+
			
 
				+    MAPREDUCE-899. Modified LinuxTaskController to check that task-controller
			
 
				+    has right permissions and ownership before performing any actions.
			
 
				+    (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    HDFS-204. Revive number of files listed metrics. (hairong)
			
 
				+
			
 
				+    HADOOP-6569. FsShell#cat should avoid calling uneccessary getFileStatus
			
 
				+    before opening a file to read. (hairong)
			
 
				+
			
 
				+    HDFS-1014. Error in reading delegation tokens from edit logs. (jitendra)
			
 
				+
			
 
				+    HDFS-458. Add under-10-min tests from 0.22 to 0.20.1xx, only the tests
			
 
				+    that already exist in 0.20.1xx (steffl)
			
 
				+
			
 
				+    MAPREDUCE-1155. Just pulls out the TestStreamingExitStatus part of the
			
 
				+    patch from jira (that went to 0.22). (ddas)
			
 
				+ 
			
 
				+    HADOOP-6600. Fix for branch backport only. Comparing of user should use
			
 
				+    equals. (boryas).
			
 
				+
			
 
				+    HDFS-1006. Fixes NameNode and SecondaryNameNode to use kerberizedSSL for
			
 
				+    the http communication. (Jakob Homan via ddas)
			
 
				+
			
 
				+    HDFS-1007. Fixes a bug on top of the earlier patch. (ddas)
			
 
				+
			
 
				+    HDFS-1005. Fsck security. Makes it work over kerberized SSL (boryas and 
			
 
				+    jhoman)
			
 
				+
			
 
				+    HDFS-1007. Makes HFTP and Distcp use kerberized SSL. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1455. Fixes a testcase in the earlier patch. 
			
 
				+    (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HDFS-992. Refactors block access token implementation to conform to the 
			
 
				+    generic Token interface. (Kan Zhang via ddas)
			
 
				+
			
 
				+    HADOOP-6584. Adds KrbSSL connector for jetty. (Jakob Homan via ddas)
			
 
				+
			
 
				+    HADOOP-6589. Add a framework for better error messages when rpc connections
			
 
				+    fail to authenticate. (Kan Zhang via omalley)
			
 
				+
			
 
				+    HADOOP-6600,HDFS-1003,MAPREDUCE-1539. mechanism for authorization check
			
 
				+    for inter-server protocols(boryas)
			
 
				+
			
 
				+    HADOOP-6580,HDFS-993,MAPREDUCE-1516. UGI should contain authentication 
			
 
				+    method.
			
 
				+
			
 
				+    Namenode and JT should issue a delegation token only for kerberos 
			
 
				+    authenticated  clients. (jitendra)
			
 
				+
			
 
				+    HDFS-984,HADOOP-6573,MAPREDUCE-1537. Delegation Tokens should be persisted
			
 
				+    in Namenode, and corresponding changes in common and mr. (jitendra)
			
 
				+
			
 
				+    HDFS-994. Provide methods for obtaining delegation token from Namenode for 
			
 
				+    hftp and other uses. Incorporates HADOOP-6594: Update hdfs script to 
			
 
				+    provide fetchdt tool. (jitendra)
			
 
				+
			
 
				+    HADOOP-6586. Log authentication and authorization failures and successes
			
 
				+    (boryas)
			
 
				+
			
 
				+    HDFS-991. Allow use of delegation tokens to authenticate to the 
			
 
				+    HDFS servlets. (omalley)
			
 
				+
			
 
				+    HADOOP-1849. Add undocumented configuration parameter for per handler 
			
 
				+    call queue size in IPC Server. (shv)
			
 
				+    
			
 
				+    HADOOP-6599. Split existing RpcMetrics with summary in RpcMetrics and
			
 
				+    details information in RpcDetailedMetrics. (suresh)
			
 
				+
			
 
				+    HDFS-985. HDFS should issue multiple RPCs for listing a large directory.
			
 
				+    (hairong)
			
 
				+
			
 
				+    HDFS-1000. Updates libhdfs to use the new UGI. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1532. Ensures all filesystem operations at the client is done
			
 
				+    as the job submitter. Also, changes the renewal to maintain list of tokens
			
 
				+    to renew. (ddas)
			
 
				+
			
 
				+    HADOOP-6596. Add a version field to the seialization of the 
			
 
				+    AbstractDelegationTokenIdentifier. (omalley)
			
 
				+
			
 
				+    HADOOP-5561. Add javadoc.maxmemory to build.xml to allow larger memory.
			
 
				+    (jkhoman via omalley)
			
 
				+
			
 
				+    HADOOP-6579. Add a mechanism for encoding and decoding Tokens in to
			
 
				+    url-safe strings. (omalley)
			
 
				+
			
 
				+    MAPREDUCE-1354. Make incremental changes in jobtracker for
			
 
				+    improving scalability (acmurthy)
			
 
				+
			
 
				+    HDFS-999.Secondary namenode should login using kerberos if security
			
 
				+    is configured(boryas)
			
 
				+
			
 
				+    MAPREDUCE-1466. Added a private configuration variable 
			
 
				+    mapreduce.input.num.files, to store number of input files 
			
 
				+    being processed by M/R job. (Arun Murthy via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1403. Save file-sizes of each of the artifacts in 
			
 
				+    DistributedCache in the JobConf (Arun Murthy via yhemanth)
			
 
				+
			
 
				+    HADOOP-6543. Fixes a compilation problem in the original commit. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1520. Moves a call to setWorkingDirectory in Child to within
			
 
				+    a doAs block. (Amareshwari Sriramadasu via ddas)
			
 
				+
			
 
				+    HADOOP-6543. Allows secure clients to talk to unsecure clusters. 
			
 
				+    (Kan Zhang via ddas)
			
 
				+
			
 
				+    MAPREDUCE-1505. Delays construction of the job client until it is really
			
 
				+    required. (Arun C Murthy via ddas)
			
 
				+
			
 
				+    HADOOP-6549. TestDoAsEffectiveUser should use ip address of the host
			
 
				+    for superuser ip check. (jitendra)
			
 
				+
			
 
				+    HDFS-464. Fix memory leaks in libhdfs. (Christian Kunz via suresh)
			
 
				+
			
 
				+    HDFS-946. NameNode should not return full path name when lisitng a
			
 
				+    diretory or getting the status of a file. (hairong)
			
 
				+
			
 
				+    MAPREDUCE-1398. Fix TaskLauncher to stop waiting for slots on a TIP 
			
 
				+    that is killed / failed. (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1476. Fix the M/R framework to not call commit for special 
			
 
				+    tasks like job setup/cleanup and task cleanup.
			
 
				+    (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    HADOOP-6467.  Performance improvement for liststatus on directories in
			
 
				+    hadoop archives. (mahadev)
			
 
				+
			
 
				+    HADOOP-6558. archive does not work with distcp -update. (nicholas via
			
 
				+    mahadev)
			
 
				+
			
 
				+    HADOOP-6583. Captures authentication and authorization metrics. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1316. Fixes a memory leak of TaskInProgress instances in
			
 
				+    the jobtracker. (Amar Kamat via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-670. Creates ant target for 10 mins patch test build.
			
 
				+    (Jothi Padmanabhan via gkesavan)
			
 
				+
			
 
				+    MAPREDUCE-1430. JobTracker should be able to renew delegation tokens 
			
 
				+    for the jobs(boryas)
			
 
				+
			
 
				+    HADOOP-6551, HDFS-986, MAPREDUCE-1503. Change API for tokens to throw
			
 
				+    exceptions instead of returning booleans. (omalley)
			
 
				+
			
 
				+    HADOOP-6545. Changes the Key for the FileSystem to be UGI. (ddas)
			
 
				+
			
 
				+    HADOOP-6572. Makes sure that SASL encryption and push to responder queue 
			
 
				+    for the RPC response happens atomically. (Kan Zhang via ddas)
			
 
				+
			
 
				+    HDFS-965. Split the HDFS TestDelegationToken into two tests, of which
			
 
				+    one proxy users and the other normal users. (jitendra via omalley)
			
 
				+
			
 
				+    HADOOP-6560. HarFileSystem throws NPE for har://hdfs-/foo (nicholas via
			
 
				+    mahadev)
			
 
				+
			
 
				+    MAPREDUCE-686. Move TestSpeculativeExecution.Fake* into a separate class
			
 
				+    so that it can be used by other tests. (Jothi Padmanabhan via sharad)
			
 
				+
			
 
				+    MAPREDUCE-181. Fixes an issue in the use of the right config. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1026. Fixes a bug in the backport. (ddas)
			
 
				+
			
 
				+    HADOOP-6559. Makes the RPC client automatically re-login when the SASL 
			
 
				+    connection setup fails. This is applicable to only keytab based logins. 
			
 
				+    (ddas)
			
 
				+
			
 
				+    HADOOP-2141. Backport changes made in the original JIRA to aid
			
 
				+    fast unit tests in Map/Reduce. (Amar Kamat via yhemanth)
			
 
				+
			
 
				+    HADOOP-6382.  Import the mavenizable pom file structure and adjust
			
 
				+    the build targets and bin scripts. (gkesvan via ltucker)
			
 
				+
			
 
				+    MAPREDUCE-1425. archive throws OutOfMemoryError (mahadev) 
			
 
				+
			
 
				+    MAPREDUCE-1399. The archive command shows a null error message. (nicholas)
			
 
				+
			
 
				+    HADOOP-6552. Puts renewTGT=true and useTicketCache=true for the keytab 
			
 
				+    kerberos options. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1433. Adds delegation token for MapReduce (ddas)
			
 
				+
			
 
				+    HADOOP-4359. Fixes a bug in the earlier backport. (ddas)
			
 
				+
			
 
				+    HADOOP-6547, HDFS-949, MAPREDUCE-1470. Move Delegation token into Common 
			
 
				+    so that we can use it for MapReduce also. It is a combined patch for 
			
 
				+    common, hdfs and mr. (jitendra)
			
 
				+
			
 
				+    HADOOP-6510,HDFS-935,MAPREDUCE-1464. Support for doAs to allow 
			
 
				+    authenticated superuser to impersonate proxy users. It is a combined 
			
 
				+    patch with compatible fixes in HDFS and MR. (jitendra)
			
 
				+
			
 
				+    MAPREDUCE-1435. Fixes the way symlinks are handled when cleaning up
			
 
				+    work directory files. (Ravi Gummadi via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-6419. Fixes a bug in the backported patch. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1457. Fixes JobTracker to get the FileSystem object within 
			
 
				+    getStagingAreaDir within a privileged block. Fixes Child.java to use the
			
 
				+    appropriate UGIs while getting the TaskUmbilicalProtocol proxy and while
			
 
				+    executing the task. Contributed by Jakob Homan. (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1440. Replace the long user name in MapReduce with the local
			
 
				+    name. (ddas) 
			
 
				+
			
 
				+    HADOOP-6419. Adds SASL based authentication to RPC. Also includes the 
			
 
				+    MAPREDUCE-1335 and HDFS-933 patches. Contributed by Kan Zhang. 
			
 
				+    (ddas)
			
 
				+ 
			
 
				+    HADOOP-6538. Sets hadoop.security.authentication to simple by default.
			
 
				+    (ddas)
			
 
				+
			
 
				+    HDFS-938.  Replace calls to UGI.getUserName() with 
			
 
				+    UGI.getShortUserName()(boryas)
			
 
				+
			
 
				+    HADOOP-6544. fix ivy settings to include JSON jackson.codehause.org
			
 
				+     libs for .20 (boryas)
			
 
				+     
			
 
				+    HDFS-907. Add tests for getBlockLocations and totalLoad metrics. (rphulari)
			
 
				+
			
 
				+    HADOOP-6204. Implementing aspects development and fault injeciton
			
 
				+    framework for Hadoop (cos)
			
 
				+
			
 
				+    MAPREDUCE-1432. Adds hooks in the jobtracker and tasktracker
			
 
				+    for loading the tokens in the user's ugi. This is required for
			
 
				+    the copying of files from the hdfs. (Devaraj Das vi boryas)
			
 
				+
			
 
				+    MAPREDUCE-1383. Automates fetching of delegation tokens in File*Formats
			
 
				+    Distributed Cache and Distcp. Also, provides a config 
			
 
				+    mapreduce.job.hdfs-servers that the jobs can populate with a comma 
			
 
				+    separated list of namenodes. The job client automatically fetches 
			
 
				+    delegation tokens from those namenodes.
			
 
				+
			
 
				+    HADOOP-6337. Update FilterInitializer class to be more visible
			
 
				+    and take a conf for further development. (jhoman)
			
 
				+
			
 
				+    HADOOP-6520. UGI should load tokens from the environment. (jitendra)
			
 
				+
			
 
				+    HADOOP-6517, HADOOP-6518. Ability to add/get tokens from 
			
 
				+    UserGroupInformation & Kerberos login in UGI should honor KRB5CCNAME
			
 
				+    (jitendra)
			
 
				+
			
 
				+    HADOOP-6299. Reimplement the UserGroupInformation to use the OS
			
 
				+    specific and Kerberos JAAS login. (jhoman, ddas, oom)
			
 
				+    
			
 
				+    HADOOP-6524. Contrib tests are failing Clover'ed build. (cos)
			
 
				+
			
 
				+    MAPREDUCE-842. Fixing a bug in the earlier version of the patch
			
 
				+    related to improper localization of the job token file.
			
 
				+    (Ravi Gummadi via yhemanth)
			
 
				+
			
 
				+    HDFS-919. Create test to validate the BlocksVerified metric (Gary Murry
			
 
				+    via cos)
			
 
				+
			
 
				+    MAPREDUCE-1186. Modified code in distributed cache to set 
			
 
				+    permissions only on required set of localized paths.
			
 
				+    (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    HDFS-899. Delegation Token Implementation. (Jitendra Nath Pandey)
			
 
				+
			
 
				+    MAPREDUCE-896. Enhance tasktracker to cleanup files that might have 
			
 
				+    been created by user tasks with non-writable permissions. 
			
 
				+    (Ravi Gummadi via yhemanth)
			
 
				+
			
 
				+    HADOOP-5879. Read compression level and strategy from Configuration for
			
 
				+    gzip compression. (He Yongqiang via cdouglas)
			
 
				+
			
 
				+    HADOOP-6161. Add get/setEnum methods to Configuration. (cdouglas)
			
 
				+
			
 
				+    HADOOP-6382 Mavenize the build.xml targets and update the bin scripts
			
 
				+    in preparation for publishing POM files (giri kesavan via ltucker)
			
 
				+
			
 
				+    HDFS-737. Add full path name of the file to the block information and 
			
 
				+    summary of total number of files, blocks, live and deadnodes to 
			
 
				+    metasave output. (Jitendra Nath Pandey via suresh)
			
 
				+
			
 
				+    HADOOP-6577. Add hidden configuration option "ipc.server.max.response.size"
			
 
				+    to change the default 1 MB, the maximum size when large IPC handler 
			
 
				+    response buffer is reset. (suresh)
			
 
				+
			
 
				+    HADOOP-6521. Fix backward compatiblity issue with umask when applications 
			
 
				+    use deprecated param dfs.umask in configuration or use 
			
 
				+    FsPermission.setUMask(). (suresh)
			
 
				+
			
 
				+    HDFS-737. Add full path name of the file to the block information and 
			
 
				+    summary of total number of files, blocks, live and deadnodes to 
			
 
				+    metasave output. (Jitendra Nath Pandey via suresh)
			
 
				+
			
 
				+    HADOOP-6521. Fix backward compatiblity issue with umask when applications 
			
 
				+    use deprecated param dfs.umask in configuration or use 
			
 
				+    FsPermission.setUMask(). (suresh)
			
 
				+
			
 
				+    MAPREDUCE-433. Use more reliable counters in TestReduceFetch.
			
 
				+    (Christopher Douglas via ddas)
			
 
				+
			
 
				+    MAPREDUCE-744. Introduces the notion of a public distributed cache.
			
 
				+    (ddas)
			
 
				+
			
 
				+    MAPREDUCE-1140. Fix DistributedCache to not decrement reference counts 
			
 
				+    for unreferenced files in error conditions.    
			
 
				+    (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1284. Fix fts_open() call in task-controller that was failing 
			
 
				+    LinuxTaskController unit tests. (Ravi Gummadi via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1098. Fixed the distributed-cache to not do i/o while 
			
 
				+    holding a global lock.
			
 
				+    (Amareshwari Sriramadasu via acmurthy)
			
 
				+
			
 
				+    MAPREDUCE-1338. Introduces the notion of token cache using which 
			
 
				+    tokens and secrets can be sent by the Job client to the JobTracker.
			
 
				+    (Boris Shkolnik)
			
 
				+
			
 
				+    HADOOP-6495. Identifier should be serialized after the password is created
			
 
				+    In Token constructor. (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HADOOP-6506. Failing tests prevent the rest of test targets from
			
 
				+    execution. (cos)
			
 
				+x
			
 
				+    HADOOP-5457. Fix to continue to run builds even if contrib test fails.
			
 
				+    (gkesavan)
			
 
				+
			
 
				+    MAPREDUCE-856. Setup secure permissions for distributed cache files.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-871. Fix ownership of Job/Task local files to have correct 
			
 
				+    group ownership according to the egid of the tasktracker.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+    
			
 
				+    MAPREDUCE-476. Extend DistributedCache to work locally (LocalJobRunner). 
			
 
				+    (Philip Zeyliger via tomwhite)
			
 
				+
			
 
				+    MAPREDUCE-711. Removed Distributed Cache from Common, to move it under 
			
 
				+    Map/Reduce. (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-478. Allow map and reduce jvm parameters, environment 
			
 
				+    variables and ulimit to be set separately. (acmurthy)
			
 
				+ 
			
 
				+    MAPREDUCE-842. Setup secure permissions for localized job files, 
			
 
				+    intermediate outputs and log files on tasktrackers.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-408. Fixes an assertion problem in TestKillSubProcesses.
			
 
				+    (Ravi Gummadi via ddas)
			
 
				+
			
 
				+    HADOOP-4041. IsolationRunner does not work as documented.
			
 
				+    (Philip Zeyliger via tomwhite)
			
 
				+
			
 
				+    MAPREDUCE-181. Changes the job submission process to be secure.
			
 
				+    (Devaraj Das)
			
 
				+
			
 
				+    HADOOP-5737. Fixes a problem in the way the JobTracker used to talk to 
			
 
				+    other daemons like the NameNode to get the job's files. Also adds APIs 
			
 
				+    in the JobTracker to get the FileSystem objects as per the JobTracker's 
			
 
				+    configuration. (Amar Kamat via ddas)
			
 
				+ 
			
 
				+    HADOOP-5771. Implements unit tests for LinuxTaskController.
			
 
				+    (Sreekanth Ramakrishnan and Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    HADOOP-4656, HDFS-685, MAPREDUCE-1083. Use the user-to-groups mapping 
			
 
				+    service in the NameNode and JobTracker. Combined patch for these 3 jiras 
			
 
				+    otherwise tests fail. (Jitendra Nath Pandey)
			
 
				+
			
 
				+    MAPREDUCE-1250. Refactor job token to use a common token interface.
			
 
				+    (Jitendra Nath Pandey)
			
 
				+
			
 
				+    MAPREDUCE-1026. Shuffle should be secure. (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HADOOP-4268. Permission checking in fsck. (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HADOOP-6415. Adding a common token interface for both job token and 
			
 
				+    delegation token. (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HADOOP-6367, HDFS-764. Moving Access Token implementation from Common to 
			
 
				+    HDFS. These two jiras must be committed together otherwise build will
			
 
				+    fail. (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HDFS-409. Add more access token tests
			
 
				+    (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HADOOP-6132. RPC client opens an extra connection for VersionedProtocol.
			
 
				+    (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HDFS-445. pread() fails when cached block locations are no longer valid.
			
 
				+    (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HDFS-195. Need to handle access token expiration when re-establishing the 
			
 
				+    pipeline for dfs write. (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HADOOP-6176. Adding a couple private methods to AccessTokenHandler 
			
 
				+    for testing purposes. (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HADOOP-5824. remove OP_READ_METADATA functionality from Datanode.
			
 
				+    (Jitendra Nath Pandey)
			
 
				+
			
 
				+    HADOOP-4359. Access Token: Support for data access authorization 
			
 
				+    checking on DataNodes. (Jitendra Nath Pandey)
			
 
				+
			
 
				+    MAPREDUCE-1372. Fixed a ConcurrentModificationException in jobtracker.
			
 
				+    (Arun C Murthy via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1316. Fix jobs' retirement from the JobTracker to prevent memory
			
 
				+    leaks via stale references. (Amar Kamat via acmurthy)  
			
 
				+
			
 
				+    MAPREDUCE-1342. Fixed deadlock in global blacklisting of tasktrackers. 
			
 
				+    (Amareshwari Sriramadasu via acmurthy)  
			
 
				+
			
 
				+    HADOOP-6460. Reinitializes buffers used for serializing responses in ipc
			
 
				+    server on exceeding maximum response size to free up Java heap. (suresh)
			
 
				+
			
 
				+    MAPREDUCE-1100. Truncate user logs to prevent TaskTrackers' disks from
			
 
				+    filling up. (Vinod Kumar Vavilapalli via acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-1143. Fix running task counters to be updated correctly
			
 
				+    when speculative attempts are running for a TIP.
			
 
				+    (Rahul Kumar Singh via yhemanth)
			
 
				+
			
 
				+    HADOOP-6151, 6281, 6285,  6441. Add HTML quoting of the parameters to all
			
 
				+    of the servlets to prevent XSS attacks. (omalley)
			
 
				+
			
 
				+    MAPREDUCE-896. Fix bug in earlier implementation to prevent
			
 
				+    spurious logging in tasktracker logs for absent file paths.
			
 
				+    (Ravi Gummadi via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-676. Fix Hadoop Vaidya to ensure it works for map-only jobs. 
			
 
				+    (Suhas Gogate via acmurthy) 
			
 
				+
			
 
				+    HADOOP-5582. Fix Hadoop Vaidya to use new Counters in
			
 
				+    org.apache.hadoop.mapreduce package. (Suhas Gogate via acmurthy) 
			
 
				+
			
 
				+    HDFS-595.  umask settings in configuration may now use octal or 
			
 
				+    symbolic instead of decimal.  Update HDFS tests as such.  (jghoman)
			
 
				+
			
 
				+    MAPREDUCE-1068. Added a verbose error message when user specifies an
			
 
				+    incorrect -file parameter. (Amareshwari Sriramadasu via acmurthy)  
			
 
				+
			
 
				+    MAPREDUCE-1171. Allow the read-error notification in shuffle to be
			
 
				+    configurable. (Amareshwari Sriramadasu via acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-353. Allow shuffle read and connection timeouts to be
			
 
				+    configurable. (Amareshwari Sriramadasu via acmurthy) 
			
 
				+
			
 
				+    HDFS-781. Namenode metrics PendingDeletionBlocks is not decremented. 
			
 
				+    (suresh)
			
 
				+ 
			
 
				+    MAPREDUCE-1185. Redirect running job url to history url if job is already 
			
 
				+    retired. (Amareshwari Sriramadasu and Sharad Agarwal via sharad)
			
 
				+
			
 
				+    MAPREDUCE-754. Fix NPE in expiry thread when a TT is lost. (Amar Kamat 
			
 
				+    via sharad)
			
 
				+
			
 
				+    MAPREDUCE-896. Modify permissions for local files on tasktracker before
			
 
				+    deletion so they can be deleted cleanly. (Ravi Gummadi via yhemanth)
			
 
				+ 
			
 
				+    HADOOP-5771. Implements unit tests for LinuxTaskController.
			
 
				+    (Sreekanth Ramakrishnan and Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1124. Import Gridmix3 and Rumen. (cdouglas)
			
 
				+
			
 
				+    MAPREDUCE-1063. Document gridmix benchmark. (cdouglas)
			
 
				+
			
 
				+    HDFS-758. Changes to report status of decommissioining on the namenode web
			
 
				+    UI. (jitendra)
			
 
				+
			
 
				+    HADOOP-6234. Add new option dfs.umaskmode to set umask in configuration
			
 
				+    to use octal or symbolic instead of decimal. (Jakob Homan via suresh)
			
 
				+
			
 
				+    MAPREDUCE-1147. Add map output counters to new API. (Amar Kamat via
			
 
				+    cdouglas)
			
 
				+
			
 
				+    MAPREDUCE-1182. Fix overflow in reduce causing allocations to exceed the
			
 
				+    configured threshold. (cdouglas)
			
 
				+
			
 
				+    HADOOP-4933. Fixes a ConcurrentModificationException problem that shows up
			
 
				+    when the history viewer is accessed concurrently.
			
 
				+    (Amar Kamat via ddas)
			
 
				+
			
 
				+    MAPREDUCE-1140. Fix DistributedCache to not decrement reference counts for 
			
 
				+    unreferenced files in error conditions.
			
 
				+    (Amareshwari Sriramadasu via yhemanth)
			
 
				+
			
 
				+    HADOOP-6203. FsShell rm/rmr error message indicates exceeding Trash quota
			
 
				+    and suggests using -skpTrash, when moving to trash fails.
			
 
				+    (Boris Shkolnik via suresh)
			
 
				+
			
 
				+    HADOOP-5675. Do not launch a job if DistCp has no work to do. (Tsz Wo
			
 
				+    (Nicholas), SZE via cdouglas)
			
 
				+
			
 
				+    HDFS-457. Better handling of volume failure in Data Node storage,
			
 
				+    This fix is a port from hdfs-0.22 to common-0.20 by Boris Shkolnik.
			
 
				+    Contributed by Erik Steffl
			
 
				+
			
 
				+    HDFS-625. Fix NullPointerException thrown from ListPathServlet. 
			
 
				+    Contributed by Suresh Srinivas.
			
 
				+
			
 
				+    HADOOP-6343. Log unexpected throwable object caught in RPC.  
			
 
				+    Contributed by Jitendra Nath Pandey
			
 
				+
			
 
				+    MAPREDUCE-1186. Fixed DistributedCache to do a recursive chmod on just the
			
 
				+    per-cache directory, not all of mapred.local.dir.
			
 
				+    (Amareshwari Sriramadasu via acmurthy)
			
 
				+
			
 
				+    MAPREDUCE-1231. Add an option to distcp to ignore checksums when used with
			
 
				+    the upgrade option.
			
 
				+    (Jothi Padmanabhan via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1219. Fixed JobTracker to not collect per-job metrics, thus
			
 
				+    easing load on it. (Amareshwari Sriramadasu via acmurthy)
			
 
				+
			
 
				+    HDFS-761. Fix failure to process rename operation from edits log due to
			
 
				+    quota verification. (suresh)
			
 
				+
			
 
				+    MAPREDUCE-1196. Fix FileOutputCommitter to use the deprecated cleanupJob
			
 
				+    api correctly. (acmurthy)
			
 
				+
			
 
				+    HADOOP-6344.  rm and rmr immediately delete files rather than sending
			
 
				+    to trash, despite trash being enabled, if a user is over-quota. (jhoman)
			
 
				+
			
 
				+    MAPREDUCE-1160. Reduce verbosity of log lines in some Map/Reduce classes
			
 
				+    to avoid filling up jobtracker logs on a busy cluster.
			
 
				+    (Ravi Gummadi and Hong Tang via yhemanth)
			
 
				+
			
 
				+    HDFS-587. Add ability to run HDFS with MR test on non-default queue,
			
 
				+    also updated junit dependendcy from junit-3.8.1 to junit-4.5 (to make
			
 
				+    it possible to use Configured and Tool to process command line to
			
 
				+    be able to specify a queue). Contributed by Erik Steffl.
			
 
				+
			
 
				+    MAPREDUCE-1158. Fix JT running maps and running reduces metrics.
			
 
				+    (sharad)
			
 
				+
			
 
				+    MAPREDUCE-947. Fix bug in earlier implementation that was
			
 
				+    causing unit tests to fail.
			
 
				+    (Ravi Gummadi via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1062. Fix MRReliabilityTest to work with retired jobs
			
 
				+    (Contributed by Sreekanth Ramakrishnan)
			
 
				+
			
 
				+    MAPREDUCE-1090. Modified log statement in TaskMemoryManagerThread to
			
 
				+    include task attempt id. (yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1098. Fixed the distributed-cache to not do i/o while
			
 
				+    holding a global lock. (Amareshwari Sriramadasu via acmurthy)
			
 
				+
			
 
				+    MAPREDUCE-1048. Add occupied/reserved slot usage summary on
			
 
				+    jobtracker UI. (Amareshwari Sriramadasu via sharad)
			
 
				+
			
 
				+    MAPREDUCE-1103. Added more metrics to Jobtracker. (sharad)
			
 
				+
			
 
				+    MAPREDUCE-947. Added commitJob and abortJob apis to OutputCommitter.
			
 
				+    Enhanced FileOutputCommitter to create a _SUCCESS file for successful
			
 
				+    jobs. (Amar Kamat & Jothi Padmanabhan via acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-1105. Remove max limit configuration in capacity scheduler in
			
 
				+    favor of max capacity percentage thus allowing the limit to go over
			
 
				+    queue capacity. (Rahul Kumar Singh via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-1086. Setup Hadoop logging environment for tasks to point to
			
 
				+    task related parameters. (Ravi Gummadi via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-739. Allow relative paths to be created inside archives. 
			
 
				+    (mahadev)
			
 
				+
			
 
				+    HADOOP-6097. Multiple bugs w/ Hadoop archives (mahadev)
			
 
				+
			
 
				+    HADOOP-6231. Allow caching of filesystem instances to be disabled on a
			
 
				+    per-instance basis (ben slusky via mahadev)
			
 
				+
			
 
				+    MAPREDUCE-826.  harchive doesn't use ToolRunner / harchive returns 0 even
			
 
				+    if the job fails with exception (koji via mahadev)
			
 
				+
			
 
				+    HDFS-686. NullPointerException is thrown while merging edit log and
			
 
				+    image. (hairong)
			
 
				+
			
 
				+    HDFS-709. Fix TestDFSShell failure due to rename bug introduced by 
			
 
				+    HDFS-677. (suresh)
			
 
				+
			
 
				+    HDFS-677. Rename failure when both source and destination quota exceeds
			
 
				+    results in deletion of source. (suresh)
			
 
				+
			
 
				+    HADOOP-6284. Add a new parameter, HADOOP_JAVA_PLATFORM_OPTS, to
			
 
				+    hadoop-config.sh so that it allows setting java command options for
			
 
				+    JAVA_PLATFORM.  (Koji Noguchi via szetszwo)
			
 
				+
			
 
				+    MAPREDUCE-732. Removed spurious log statements in the node
			
 
				+    blacklisting logic. (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-144. Includes dump of the process tree in task diagnostics when 
			
 
				+    a task is killed due to exceeding memory limits.
			
 
				+    (Vinod Kumar Vavilapalli via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-979. Fixed JobConf APIs related to memory parameters to 
			
 
				+    return values of new configuration variables when deprecated 
			
 
				+    variables are disabled. (Sreekanth Ramakrishnan via yhemanth)
			
 
				+
			
 
				+    MAPREDUCE-277. Makes job history counters available on the job history
			
 
				+    viewers. (Jothi Padmanabhan via ddas)
			
 
				+
			
 
				+    HADOOP-5625. Add operation duration to clienttrace. (Lei Xu 
			
 
				+    via cdouglas)
			
 
				+
			
 
				+    HADOOP-5222. Add offset to datanode clienttrace. (Lei Xu via cdouglas)
			
 
				+
			
 
				+    HADOOP-6218. Adds a feature where TFile can be split by Record
			
 
				+    Sequence number. Contributed by Hong Tang and Raghu Angadi.
			
 
				+
			
 
				+    MAPREDUCE-1088. Changed permissions on JobHistory files on local disk to
			
 
				+    0744. Contributed by Arun C. Murthy.
			
 
				+
			
 
				+    HADOOP-6304. Use java.io.File.set{Readable|Writable|Executable} where
			
 
				+    possible in RawLocalFileSystem. Contributed by Arun C. Murthy.
			
 
				+    
			
 
				+    MAPREDUCE-270. Fix the tasktracker to optionally send an out-of-band
			
 
				+    heartbeat on task-completion for better job-latency. Contributed by
			
 
				+    Arun C. Murthy
			
 
				+    Configuration changes:
			
 
				+      add mapreduce.tasktracker.outofband.heartbeat
			
 
				+
			
 
				+    MAPREDUCE-1030. Fix capacity-scheduler to assign a map and a reduce task
			
 
				+    per-heartbeat. Contributed by Rahuk K Singh.
			
 
				+
			
 
				+    MAPREDUCE-1028. Fixed number of slots occupied by cleanup tasks to one
			
 
				+    irrespective of slot size for the job. Contributed by Ravi Gummadi. 
			
 
				+
			
 
				+    MAPREDUCE-964. Fixed start and finish times of TaskStatus to be
			
 
				+    consistent, thereby fixing inconsistencies in metering tasks.
			
 
				+    Contributed by Sreekanth Ramakrishnan.
			
 
				+
			
 
				+    HADOOP-5976. Add a new command, classpath, to the hadoop 
			
 
				+    script. Contributed by Owen O'Malley and Gary Murry
			
 
				+
			
 
				+    HADOOP-5784. Makes the number of heartbeats that should arrive 
			
 
				+    a second at the JobTracker configurable. Contributed by 
			
 
				+    Amareshwari Sriramadasu.
			
 
				+
			
 
				+    MAPREDUCE-945. Modifies MRBench and TestMapRed to use 
			
 
				+    ToolRunner so that options such as queue name can be 
			
 
				+    passed via command line. Contributed by Sreekanth Ramakrishnan.
			
 
				+
			
 
				+    HADOOP:5420 Correct bug in earlier implementation
			
 
				+    by Arun C. Murthy
			
 
				+
			
 
				+    HADOOP-5363 Add support for proxying connections to multiple 
			
 
				+    clusters with different versions to hdfsproxy. Contributed 
			
 
				+    by Zhiyong Zhang
			
 
				+
			
 
				+    HADOOP-5780. Improve per block message prited by -metaSave 
			
 
				+    in HDFS. (Raghu Angadi)
			
 
				+
			
 
				+    HADOOP-6227. Fix Configuration to allow final parameters to be set 
			
 
				+    to null and prevent them from being overridden. Contributed by 
			
 
				+    Amareshwari Sriramadasu.
			
 
				+
			
 
				+    MAPREDUCE-430  Added patch supplied by Amar Kamat to allow 
			
 
				+                   roll forward on branch to includ externally committed
			
 
				+                   patch.
			
 
				+
			
 
				+    MAPREDUCE-768. Provide an option to dump jobtracker configuration in 
			
 
				+    JSON format to standard output. Contributed by V.V.Chaitanya
			
 
				+
			
 
				+    MAPREDUCE-834 Correct an issue created by merging this issue with
			
 
				+    patch attached to external Jira.
			
 
				+
			
 
				+    HADOOP-6184 Provide an API to dump Configuration in a JSON format. 
			
 
				+    Contributed by V.V.Chaitanya Krishna.
			
 
				+
			
 
				+    MAPREDUCE-745  Patch added for this issue to allow branch-0.20 to 
			
 
				+    merge cleanly.
			
 
				+
			
 
				+    MAPREDUCE:478 Allow map and reduce jvm parameters, environment 
			
 
				+    variables and ulimit to be set separately.
			
 
				+
			
 
				+    MAPREDUCE:682 Removes reservations on tasktrackers which are blacklisted. 
			
 
				+    Contributed by Sreekanth Ramakrishnan.
			
 
				+
			
 
				+    HADOOP:5420 Support killing of process groups in LinuxTaskController 
			
 
				+    binary
			
 
				+
			
 
				+    HADOOP-5488 Removes the pidfile management for the Task JVM from the 
			
 
				+    framework and instead passes the PID back and forth between the 
			
 
				+    TaskTracker and the Task processes. Contributed by Ravi Gummadi.
			
 
				+
			
 
				+    MAPREDUCE:467 Provide ability to collect statistics about total tasks and 
			
 
				+    succeeded tasks in different time windows.
			
 
				+
			
 
				+    MAPREDUCE-817. Add a cache for retired jobs with minimal job
			
 
				+    info and provide a way to access history file url
			
 
				+
			
 
				+    MAPREDUCE-814. Provide a way to configure completed job history
			
 
				+    files to be on HDFS.
			
 
				+
			
 
				+    MAPREDUCE-838 Fixes a problem in the way commit of task outputs
			
 
				+    happens. The bug was that even if commit failed, the task would be
			
 
				+    declared as successful. Contributed by Amareshwari Sriramadasu.
			
 
				+
			
 
				+    MAPREDUCE-809 Fix job-summary logs to correctly record final status of 
			
 
				+    FAILED and KILLED jobs.  
			
 
				+    http://issues.apache.org/jira/secure/attachment/12414726/MAPREDUCE-809_0_20090728_yhadoop20.patch 
			
 
				+
			
 
				+    MAPREDUCE-740 Log a job-summary at the end of a job, while
			
 
				+    allowing it to be configured to use a custom appender if desired.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12413941/MAPREDUCE-740_2_20090717_yhadoop20.patch
			
 
				+
			
 
				+    MAPREDUCE-771 Fixes a bug which delays normal jobs in favor of
			
 
				+    high-ram jobs.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12413990/MAPREDUCE-771-20.patch
			
 
				+
			
 
				+    HADOOP-5420 Support setsid based kill in LinuxTaskController.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12414735/5420-ydist.patch.txt
			
 
				+
			
 
				+    MAPREDUCE-733 Fixes a bug that when a task tracker is killed ,
			
 
				+    it throws exception. Instead it should catch it and process it and
			
 
				+    allow the rest of the flow to go through
			
 
				+    http://issues.apache.org/jira/secure/attachment/12413015/MAPREDUCE-733-ydist.patch
			
 
				+
			
 
				+    MAPREDUCE-734 Fixes a bug which prevented hi ram jobs from being
			
 
				+    removed from the scheduler queue.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12413035/MAPREDUCE-734-20.patch
			
 
				+
			
 
				+    MAPREDUCE-693  Fixes a bug that when a job is submitted and the
			
 
				+    JT is restarted (before job files have been written) and the job
			
 
				+    is killed after recovery, the conf files fail to be moved to the
			
 
				+    "done" subdirectory.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12412823/MAPREDUCE-693-v1.2-branch-0.20.patch
			
 
				+
			
 
				+    MAPREDUCE-722 Fixes a bug where more slots are getting reserved
			
 
				+    for HiRAM job tasks than required.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12412744/MAPREDUCE-722.1.txt
			
 
				+
			
 
				+    MAPREDUCE-683 TestJobTrackerRestart failed because of stale
			
 
				+    filemanager cache (which was created once per jvm). This patch makes
			
 
				+    sure that the filemanager is inited upon every JobHistory.init()
			
 
				+    and hence upon every restart. Note that this wont happen in production
			
 
				+    as upon a restart the new jobtracker will start in a new jvm and
			
 
				+    hence a new cache will be created.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12412743/MAPREDUCE-683-v1.2.1-branch-0.20.patch
			
 
				+
			
 
				+    MAPREDUCE-709 Fixes a bug where node health check script does
			
 
				+    not display the correct message on timeout.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12412711/mapred-709-ydist.patch
			
 
				+
			
 
				+    MAPREDUCE-708 Fixes a bug where node health check script does
			
 
				+    not refresh the "reason for blacklisting".
			
 
				+    http://issues.apache.org/jira/secure/attachment/12412706/MAPREDUCE-708-ydist.patch
			
 
				+
			
 
				+    MAPREDUCE-522 Rewrote TestQueueCapacities to make it simpler
			
 
				+    and avoid timeout errors.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12412472/mapred-522-ydist.patch
			
 
				+
			
 
				+    MAPREDUCE-532 Provided ability in the capacity scheduler to
			
 
				+    limit the number of slots that can be concurrently used per queue
			
 
				+    at any given time.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12412592/MAPREDUCE-532-20.patch
			
 
				+
			
 
				+    MAPREDUCE-211 Provides ability to run a health check script on
			
 
				+    the tasktracker nodes and blacklist nodes if they are unhealthy.
			
 
				+    Contributed by Sreekanth Ramakrishnan.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12412161/mapred-211-internal.patch
			
 
				+
			
 
				+    MAPREDUCE-516 Remove .orig file included by mistake.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12412108/HADOOP-5964_2_20090629_yhadoop.patch
			
 
				+
			
 
				+    MAPREDUCE-416 Moves the history file to a "done" folder whenever
			
 
				+    a job completes.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12411938/MAPREDUCE-416-v1.6-branch-0.20.patch
			
 
				+
			
 
				+    HADOOP-5980 Previously, task spawned off by LinuxTaskController
			
 
				+    didn't get LD_LIBRARY_PATH in their environment. The tasks will now
			
 
				+    get same LD_LIBRARY_PATH value as when spawned off by
			
 
				+    DefaultTaskController.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12410825/hadoop-5980-v20.patch
			
 
				+
			
 
				+    HADOOP-5981 This issue completes the feature mentioned in
			
 
				+    HADOOP-2838. HADOOP-2838 provided a way to set env variables in
			
 
				+    child process. This issue provides a way to inherit tt's env variables
			
 
				+    and append or reset it. So now X=$X:y will inherit X (if there) and
			
 
				+    append y to it.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12410454/hadoop5981-branch-20-example.patch
			
 
				+
			
 
				+    HADOOP-5419  This issue is to provide an improvement on the
			
 
				+    existing M/R framework to let users know which queues they have
			
 
				+    access to, and for what operations. One use case for this would
			
 
				+    that currently there is no easy way to know if the user has access
			
 
				+    to submit jobs to a queue, until it fails with an access control
			
 
				+    exception.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12410824/hadoop-5419-v20.2.patch
			
 
				+
			
 
				+    HADOOP-5420 Support setsid based kill in LinuxTaskController.
			
 
				+    http://issues.apache.org/jira/secure/attachment/12414735/5420-ydist.patch.txt
			
 
				+
			
 
				+    HADOOP-5643 Added the functionality to refresh jobtrackers node
			
 
				+    list via command line (bin/hadoop mradmin -refreshNodes). The command
			
 
				+    should be run as the jobtracker owner (jobtracker process owner)
			
 
				+    or from a super group (mapred.permissions.supergroup).
			
 
				+    http://issues.apache.org/jira/secure/attachment/12410619/Fixed%2B5643-0.20-final
			
 
				+
			
 
				+
			
 
				+    HADOOP-2838 Now the users can set environment variables using
			
 
				+    mapred.child.env. They can do the following X=Y : set X to Y X=$X:Y
			
 
				+    : Append Y to X (which should be taken from the tasktracker)
			
 
				+    http://issues.apache.org/jira/secure/attachment/12409895/HADOOP-2838-v2.2-branch-20-example.patch
			
 
				+    
			
 
				+    HADOOP-5818. Revert the renaming from FSNamesystem.checkSuperuserPrivilege
			
 
				+    to checkAccess by HADOOP-5643.  (Amar Kamat via szetszwo)
			
 
				+    https://issues.apache.org/jira/secure/attachment/12409835/5818for0.20.patch
			
 
				+
			
 
				+    HADOOP-5801. Fixes the problem: If the hosts file is changed across restart
			
 
				+    then it should be refreshed upon recovery so that the excluded hosts are
			
 
				+    lost and the maps are re-executed. (Amar Kamat via ddas)
			
 
				+    https://issues.apache.org/jira/secure/attachment/12409834/5801-0.20.patch
			
 
				+
			
 
				+    HADOOP-5643. HADOOP-5643. Adds a way to decommission TaskTrackers 
			
 
				+    while the JobTracker is running. (Amar Kamat via ddas)
			
 
				+    https://issues.apache.org/jira/secure/attachment/12409833/Fixed+5643-0.20
			
 
				+
			
 
				+    HADOOP-5419. Provide a facility to query the Queue ACLs for the
			
 
				+    current user.  (Rahul Kumar Singh via yhemanth)
			
 
				+    http://issues.apache.org/jira/secure/attachment/12409323/hadoop-5419-v20.patch
			
 
				+
			
 
				+    HADOOP-5733. Add map/reduce slot capacity and blacklisted capacity to
			
 
				+    JobTracker metrics. (Sreekanth Ramakrishnan via cdouglas)
			
 
				+    http://issues.apache.org/jira/secure/attachment/12409322/hadoop-5733-v20.patch
			
 
				+
			
 
				+    HADOOP-5738. Split "waiting_tasks" JobTracker metric into waiting maps and
			
 
				+    waiting reduces. (Sreekanth Ramakrishnan via cdouglas) 
			
 
				+    https://issues.apache.org/jira/secure/attachment/12409321/5738-y20.patch
			
 
				+
			
 
				+    HADOOP-4842. Streaming now allows specifiying a command for the combiner.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+    http://issues.apache.org/jira/secure/attachment/12402355/patch-4842-3.txt
			
 
				+
			
 
				+    HADOOP-4490. Provide ability to run tasks as job owners.
			
 
				+    (Sreekanth Ramakrishnan via yhemanth)
			
 
				+    http://issues.apache.org/jira/secure/attachment/12409318/hadoop-4490-br20-3.patch
			
 
				+    https://issues.apache.org/jira/secure/attachment/12410170/hadoop-4490-br20-3.2.patch
			
 
				+
			
 
				+    HADOOP-5442. Paginate jobhistory display and added some search
			
 
				+    capabilities. (Amar Kamat via acmurthy)
			
 
				+    http://issues.apache.org/jira/secure/attachment/12402301/HADOOP-5442-v1.12.patch
			
 
				+
			
 
				+    HADOOP-3327. Improves handling of READ_TIMEOUT during map output copying.
			
 
				+    (Amareshwari Sriramadasu via ddas)
			
 
				+    http://issues.apache.org/jira/secure/attachment/12399449/patch-3327-2.txt
			
 
				+
			
 
				+    HADOOP-5113. Fixed logcondense to remove files for usernames
			
 
				+    beginning with characters specified in the -l option.
			
 
				+    (Peeyush Bishnoi via yhemanth)
			
 
				+    http://issues.apache.org/jira/secure/attachment/12409317/hadoop-5113-0.18.txt
			
 
				+
			
 
				+    HADOOP-2898. Provide an option to specify a port range for
			
 
				+    Hadoop services provisioned by HOD.
			
 
				+    (Peeyush Bishnoi via yhemanth)
			
 
				+    http://issues.apache.org/jira/secure/attachment/12409316/hadoop-2898-0.20.txt
			
 
				+
			
 
				+    HADOOP-4930. Implement a Linux native executable that can be used to
			
 
				+    launch tasks as users. (Sreekanth Ramakrishnan via yhemanth)
			
 
				+    http://issues.apache.org/jira/secure/attachment/12409402/hadoop-4930v20.patch
			
 
				+
			
 
				+Release 0.20.3 - Unreleased
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    HDFS-955. New implementation of saveNamespace() to avoid loss of edits 
			
 
				+    when name-node fails during saving. (shv)
			
 
				+
			
 
				+Release 0.20.2 - Unreleased
			
 
				+
			
 
				+  BUG FIXES
			
 
				+
			
 
				+    MAPREDUCE-112. Add counters for reduce input, output records to the new API.
			
 
				+    (Jothi Padmanabhan via cdouglas)
			
 
				+
			
 
				+    HADOOP-6498. IPC client bug may cause rpc call hang. (Ruyue Ma and hairong
			
 
				+    via hairong)
			
 
				+
			
 
				+    HDFS-927. DFSInputStream retries too many times for new block locations
			
 
				+    (Todd Lipcon via Stack)
			
 
				+
			
 
				+    HDFS-793. DataNode should first receive the whole packet ack message
			
 
				+    before it constructs and sends its own ack message for the packet.
			
 
				+    (hairong)
			
 
				+
			
 
				+  IMPROVEMENTS
			
 
				+
			
 
				+    HDFS-187. Initialize secondary namenode http address in TestStartup.
			
 
				+    (Todd Lipcon via szetszwo)
			
 
				+
			
 
				+    HDFS-185. Disallow chown, chgrp, chmod, setQuota, and setSpaceQuota when
			
 
				+    name-node is in safemode. (Ravi Phulari via shv)
			
 
				+
			
 
				 Release 0.20.1 - 2009-09-01
			
 
				 
			
 
				   INCOMPATIBLE CHANGES
			
--- a/bin/hadoop
+++ b/bin/hadoop
@@ -24,6 +24,12 @@
 
				 #
			
 
				 #   HADOOP_CLASSPATH Extra Java CLASSPATH entries.
			
 
				 #
			
 
				+#   HADOOP_USER_CLASSPATH_FIRST      When defined, the HADOOP_CLASSPATH is 
			
 
				+#                                    added in the beginning of the global
			
 
				+#                                    classpath. Can be defined, for example,
			
 
				+#                                    by doing 
			
 
				+#                                    export HADOOP_USER_CLASSPATH_FIRST=true
			
 
				+#
			
 
				 #   HADOOP_HEAPSIZE  The maximum amount of heap to use, in MB. 
			
 
				 #                    Default is 1000.
			
 
				 #
			
@@ -64,15 +70,19 @@ if [ $# = 0 ]; then
 
				   echo "  fsck                 run a DFS filesystem checking utility"
			
 
				   echo "  fs                   run a generic filesystem user client"
			
 
				   echo "  balancer             run a cluster balancing utility"
			
 
				+  echo "  fetchdt              fetch a delegation token from the NameNode"
			
 
				   echo "  jobtracker           run the MapReduce job Tracker node" 
			
 
				   echo "  pipes                run a Pipes job"
			
 
				   echo "  tasktracker          run a MapReduce task Tracker node" 
			
 
				+  echo "  historyserver        run job history servers as a standalone daemon"
			
 
				   echo "  job                  manipulate MapReduce jobs"
			
 
				   echo "  queue                get information regarding JobQueues" 
			
 
				   echo "  version              print the version"
			
 
				   echo "  jar <jar>            run a jar file"
			
 
				   echo "  distcp <srcurl> <desturl> copy file or directories recursively"
			
 
				-  echo "  archive -archiveName NAME <src>* <dest> create a hadoop archive"
			
 
				+  echo "  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
			
 
				+  echo "  classpath            prints the class path needed to get the"
			
 
				+  echo "                       Hadoop jar and the required libraries"
			
 
				   echo "  daemonlog            get/set the log level for each daemon"
			
 
				   echo " or"
			
 
				   echo "  CLASSNAME            run the class named CLASSNAME"
			
@@ -88,6 +98,14 @@ if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
 
				   . "${HADOOP_CONF_DIR}/hadoop-env.sh"
			
 
				 fi
			
 
				 
			
 
				+# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
			
 
				+if [ "$COMMAND" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
			
 
				+  HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
			
 
				+  HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
			
 
				+  HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER
			
 
				+  starting_secure_dn="true"
			
 
				+fi
			
 
				+
			
 
				 # some Java parameters
			
 
				 if [ "$JAVA_HOME" != "" ]; then
			
 
				   #echo "run java in $JAVA_HOME"
			
@@ -111,6 +129,9 @@ fi
 
				 
			
 
				 # CLASSPATH initially contains $HADOOP_CONF_DIR
			
 
				 CLASSPATH="${HADOOP_CONF_DIR}"
			
 
				+if [ "$HADOOP_USER_CLASSPATH_FIRST" != "" ] && [ "$HADOOP_CLASSPATH" != "" ] ; then
			
 
				+  CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
			
 
				+fi
			
 
				 CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
			
 
				 
			
 
				 # for developers, add Hadoop classes to CLASSPATH
			
@@ -134,7 +155,7 @@ IFS=
 
				 if [ -d "$HADOOP_HOME/webapps" ]; then
			
 
				   CLASSPATH=${CLASSPATH}:$HADOOP_HOME
			
 
				 fi
			
 
				-for f in $HADOOP_HOME/hadoop-*-core.jar; do
			
 
				+for f in $HADOOP_HOME/hadoop-core-*.jar; do
			
 
				   CLASSPATH=${CLASSPATH}:$f;
			
 
				 done
			
 
				 
			
@@ -153,15 +174,15 @@ for f in $HADOOP_HOME/lib/jsp-2.1/*.jar; do
 
				   CLASSPATH=${CLASSPATH}:$f;
			
 
				 done
			
 
				 
			
 
				-for f in $HADOOP_HOME/hadoop-*-tools.jar; do
			
 
				+for f in $HADOOP_HOME/hadoop-tools-*.jar; do
			
 
				   TOOL_PATH=${TOOL_PATH}:$f;
			
 
				 done
			
 
				-for f in $HADOOP_HOME/build/hadoop-*-tools.jar; do
			
 
				+for f in $HADOOP_HOME/build/hadoop-tools-*.jar; do
			
 
				   TOOL_PATH=${TOOL_PATH}:$f;
			
 
				 done
			
 
				 
			
 
				 # add user-specified CLASSPATH last
			
 
				-if [ "$HADOOP_CLASSPATH" != "" ]; then
			
 
				+if [ "$HADOOP_USER_CLASSPATH_FIRST" = "" ] && [ "$HADOOP_CLASSPATH" != "" ]; then
			
 
				   CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
			
 
				 fi
			
 
				 
			
@@ -182,7 +203,13 @@ fi
 
				 unset IFS
			
 
				 
			
 
				 # figure out which class to run
			
 
				-if [ "$COMMAND" = "namenode" ] ; then
			
 
				+if [ "$COMMAND" = "classpath" ] ; then
			
 
				+  if $cygwin; then
			
 
				+    CLASSPATH=`cygpath -p -w "$CLASSPATH"`
			
 
				+  fi
			
 
				+  echo $CLASSPATH
			
 
				+  exit
			
 
				+elif [ "$COMMAND" = "namenode" ] ; then
			
 
				   CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode'
			
 
				   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_NAMENODE_OPTS"
			
 
				 elif [ "$COMMAND" = "secondarynamenode" ] ; then
			
@@ -190,7 +217,11 @@ elif [ "$COMMAND" = "secondarynamenode" ] ; then
 
				   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_SECONDARYNAMENODE_OPTS"
			
 
				 elif [ "$COMMAND" = "datanode" ] ; then
			
 
				   CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
			
 
				-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_DATANODE_OPTS"
			
 
				+  if [[ $EUID -eq 0 ]]; then
			
 
				+    HADOOP_OPTS="$HADOOP_OPTS -jvm server $HADOOP_DATANODE_OPTS"
			
 
				+  else
			
 
				+    HADOOP_OPTS="$HADOOP_OPTS -server $HADOOP_DATANODE_OPTS"
			
 
				+  fi
			
 
				 elif [ "$COMMAND" = "fs" ] ; then
			
 
				   CLASS=org.apache.hadoop.fs.FsShell
			
 
				   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
@@ -209,16 +240,23 @@ elif [ "$COMMAND" = "fsck" ] ; then
 
				 elif [ "$COMMAND" = "balancer" ] ; then
			
 
				   CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
			
 
				   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
			
 
				+elif [ "$COMMAND" = "fetchdt" ] ; then
			
 
				+  CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
			
 
				 elif [ "$COMMAND" = "jobtracker" ] ; then
			
 
				   CLASS=org.apache.hadoop.mapred.JobTracker
			
 
				   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOBTRACKER_OPTS"
			
 
				+elif [ "$COMMAND" = "historyserver" ] ; then
			
 
				+  CLASS=org.apache.hadoop.mapred.JobHistoryServer
			
 
				+  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOB_HISTORYSERVER_OPTS"
			
 
				 elif [ "$COMMAND" = "tasktracker" ] ; then
			
 
				   CLASS=org.apache.hadoop.mapred.TaskTracker
			
 
				   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_TASKTRACKER_OPTS"
			
 
				 elif [ "$COMMAND" = "job" ] ; then
			
 
				   CLASS=org.apache.hadoop.mapred.JobClient
			
 
				+  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				 elif [ "$COMMAND" = "queue" ] ; then
			
 
				   CLASS=org.apache.hadoop.mapred.JobQueueClient
			
 
				+  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				 elif [ "$COMMAND" = "pipes" ] ; then
			
 
				   CLASS=org.apache.hadoop.mapred.pipes.Submitter
			
 
				   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
@@ -227,6 +265,7 @@ elif [ "$COMMAND" = "version" ] ; then
 
				   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				 elif [ "$COMMAND" = "jar" ] ; then
			
 
				   CLASS=org.apache.hadoop.util.RunJar
			
 
				+  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				 elif [ "$COMMAND" = "distcp" ] ; then
			
 
				   CLASS=org.apache.hadoop.tools.DistCp
			
 
				   CLASSPATH=${CLASSPATH}:${TOOL_PATH}
			
@@ -255,7 +294,7 @@ fi
 
				 # setup 'java.library.path' for native-hadoop code if necessary
			
 
				 JAVA_LIBRARY_PATH=''
			
 
				 if [ -d "${HADOOP_HOME}/build/native" -o -d "${HADOOP_HOME}/lib/native" ]; then
			
 
				-  JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} -Xmx32m org.apache.hadoop.util.PlatformName | sed -e "s/ /_/g"`
			
 
				+  JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} -Xmx32m ${HADOOP_JAVA_PLATFORM_OPTS} org.apache.hadoop.util.PlatformName | sed -e "s/ /_/g"`
			
 
				   
			
 
				   if [ -d "$HADOOP_HOME/build/native" ]; then
			
 
				     JAVA_LIBRARY_PATH=${HADOOP_HOME}/build/native/${JAVA_PLATFORM}/lib
			
@@ -285,5 +324,23 @@ if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
 
				 fi  
			
 
				 HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"
			
 
				 
			
 
				-# run it
			
 
				-exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"
			
 
				+# Check to see if we should start a secure datanode
			
 
				+if [ "$starting_secure_dn" = "true" ]; then
			
 
				+  if [ "$HADOOP_PID_DIR" = "" ]; then
			
 
				+    HADOOP_SECURE_DN_PID="/tmp/hadoop_secure_dn.pid"
			
 
				+  else
			
 
				+   HADOOP_SECURE_DN_PID="$HADOOP_PID_DIR/hadoop_secure_dn.pid"
			
 
				+  fi
			
 
				+
			
 
				+  exec "$HADOOP_HOME/bin/jsvc" -Dproc_$COMMAND -outfile "$HADOOP_LOG_DIR/jsvc.out" \
			
 
				+                                               -errfile "$HADOOP_LOG_DIR/jsvc.err" \
			
 
				+                                               -pidfile "$HADOOP_SECURE_DN_PID" \
			
 
				+                                               -nodetach \
			
 
				+                                               -user "$HADOOP_SECURE_DN_USER" \
			
 
				+                                               -cp "$CLASSPATH" \
			
 
				+                                               $JAVA_HEAP_MAX $HADOOP_OPTS \
			
 
				+                                               org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter "$@"
			
 
				+else
			
 
				+  # run it
			
 
				+  exec "$JAVA" -Dproc_$COMMAND $JAVA_HEAP_MAX $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"
			
 
				+fi
			
--- a/bin/hadoop-daemon.sh
+++ b/bin/hadoop-daemon.sh
@@ -68,20 +68,28 @@ if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
 
				   . "${HADOOP_CONF_DIR}/hadoop-env.sh"
			
 
				 fi
			
 
				 
			
 
				+# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
			
 
				+if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
			
 
				+  export HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
			
 
				+  export HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
			
 
				+  export HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER   
			
 
				+fi
			
 
				+
			
 
				+if [ "$HADOOP_IDENT_STRING" = "" ]; then
			
 
				+  export HADOOP_IDENT_STRING="$USER"
			
 
				+fi
			
 
				+
			
 
				 # get log directory
			
 
				 if [ "$HADOOP_LOG_DIR" = "" ]; then
			
 
				   export HADOOP_LOG_DIR="$HADOOP_HOME/logs"
			
 
				 fi
			
 
				 mkdir -p "$HADOOP_LOG_DIR"
			
 
				+chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR 
			
 
				 
			
 
				 if [ "$HADOOP_PID_DIR" = "" ]; then
			
 
				   HADOOP_PID_DIR=/tmp
			
 
				 fi
			
 
				 
			
 
				-if [ "$HADOOP_IDENT_STRING" = "" ]; then
			
 
				-  export HADOOP_IDENT_STRING="$USER"
			
 
				-fi
			
 
				-
			
 
				 # some variables
			
 
				 export HADOOP_LOGFILE=hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.log
			
 
				 export HADOOP_ROOT_LOGGER="INFO,DRFA"
			
--- a/bin/rcc
+++ b/bin/rcc
@@ -72,7 +72,7 @@ IFS=
 
				 if [ -d "$HADOOP_HOME/webapps" ]; then
			
 
				   CLASSPATH=${CLASSPATH}:$HADOOP_HOME
			
 
				 fi
			
 
				-for f in $HADOOP_HOME/hadoop-*-core.jar; do
			
 
				+for f in $HADOOP_HOME/hadoop-core-*.jar; do
			
 
				   CLASSPATH=${CLASSPATH}:$f;
			
 
				 done
			
 
				 
			
--- a/bin/start-jobhistoryserver.sh
+++ b/bin/start-jobhistoryserver.sh
@@ -0,0 +1,27 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+# Start hadoop job history daemons.  Run this on node where history server need to run
			
 
				+
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+
			
 
				+. "$bin"/hadoop-config.sh
			
 
				+
			
 
				+# start daemon
			
 
				+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR start historyserver
			
--- a/bin/stop-jobhistoryserver.sh
+++ b/bin/stop-jobhistoryserver.sh
@@ -0,0 +1,27 @@
 
				+#!/usr/bin/env bash
			
 
				+
			
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+
			
 
				+# Stop hadoop job history daemon.  Run this on the node where history server is running
			
 
				+
			
 
				+bin=`dirname "$0"`
			
 
				+bin=`cd "$bin"; pwd`
			
 
				+
			
 
				+. "$bin"/hadoop-config.sh
			
 
				+
			
 
				+"$bin"/hadoop-daemon.sh --config $HADOOP_CONF_DIR stop historyserver
			
 
				+
			
--- a/build.xml
+++ b/build.xml
@@ -18,6 +18,7 @@
 
				 -->
			
 
				 
			
 
				 <project name="Hadoop" default="compile" 
			
 
				+   xmlns:artifact="urn:maven-artifact-ant"
			
 
				    xmlns:ivy="antlib:org.apache.ivy.ant"> 
			
 
				 
			
 
				   <!-- Load all the default properties, and any the user wants    -->
			
@@ -27,9 +28,17 @@
 
				  
			
 
				   <property name="Name" value="Hadoop"/>
			
 
				   <property name="name" value="hadoop"/>
			
 
				-  <property name="version" value="0.20.2-dev"/>
			
 
				+  <property name="version" value="0.20.202.0-SNAPSHOT"/>
			
 
				   <property name="final.name" value="${name}-${version}"/>
			
 
				+  <property name="test.final.name" value="${name}-test-${version}"/>
			
 
				   <property name="year" value="2009"/>
			
 
				+  
			
 
				+  <property name="core.final.name" value="${name}-core-${version}"/>
			
 
				+  <property name="test.final.name" value="${name}-test-${version}"/>
			
 
				+  <property name="examples.final.name" value="${name}-examples-${version}"/>
			
 
				+  <property name="tools.final.name" value="${name}-tools-${version}"/>
			
 
				+  <property name="ant.final.name" value="${name}-ant-${version}"/>
			
 
				+  <property name="streaming.final.name" value="${name}-streaming-${version}"/>
			
 
				 
			
 
				   <property name="src.dir" value="${basedir}/src"/>  	
			
 
				   <property name="core.src.dir" value="${src.dir}/core"/>
			
@@ -109,6 +118,11 @@
 
				   <property name="test.junit.printsummary" value="yes" />
			
 
				   <property name="test.junit.haltonfailure" value="no" />
			
 
				   <property name="test.junit.maxmemory" value="512m" />
			
 
				+  <property name="test.tools.input.dir" value="${basedir}/src/test/tools/data"/>
			
 
				+  
			
 
				+  <property name="test.commit.tests.file" value="${test.src.dir}/commit-tests" />
			
 
				+  <property name="test.smoke.tests.file" value="${test.src.dir}/smoke-tests" />
			
 
				+  <property name="test.all.tests.file" value="${test.src.dir}/all-tests" />
			
 
				 
			
 
				   <property name="test.libhdfs.conf.dir" value="${c++.libhdfs.src}/tests/conf"/>
			
 
				   <property name="test.libhdfs.dir" value="${test.build.dir}/libhdfs"/>
			
@@ -120,6 +134,7 @@
 
				   <property name="javadoc.link.java"
			
 
				 	    value="http://java.sun.com/javase/6/docs/api/"/>
			
 
				   <property name="javadoc.packages" value="org.apache.hadoop.*"/>
			
 
				+  <property name="javadoc.maxmemory" value="512m" />
			
 
				 
			
 
				   <property name="dist.dir" value="${build.dir}/${final.name}"/>
			
 
				 
			
@@ -137,7 +152,7 @@
 
				 
			
 
				   <property name="jdiff.build.dir" value="${build.docs}/jdiff"/>
			
 
				   <property name="jdiff.xml.dir" value="${lib.dir}/jdiff"/>
			
 
				-  <property name="jdiff.stable" value="0.19.2"/>
			
 
				+  <property name="jdiff.stable" value="0.20.9"/>
			
 
				   <property name="jdiff.stable.javadoc" 
			
 
				             value="http://hadoop.apache.org/core/docs/r${jdiff.stable}/api/"/>
			
 
				 
			
@@ -147,32 +162,89 @@
 
				   <property name="patch.cmd" value="patch"/>
			
 
				   <property name="make.cmd" value="make"/>
			
 
				 
			
 
				+  <property name="jsvc.build.dir" value="${build.dir}/jsvc" />
			
 
				+  <property name="jsvc.install.dir" value="${dist.dir}/bin" /> 
			
 
				+  <property name="jsvc.location" value="http://archive.apache.org/dist/commons/daemon/binaries/1.0.2/linux/commons-daemon-1.0.2-bin-linux-i386.tar.gz" />
			
 
				+  <property name="jsvc.dest.name" value="jsvc.tar.gz" />
			
 
				+
			
 
				+  <!-- task-controller properties set here -->
			
 
				+  <!-- Source directory from where configure is run and files are copied
			
 
				+  -->
			
 
				+	
			
 
				+  <property name="c++.task-controller.src" 
			
 
				+    value="${basedir}/src/c++/task-controller" />
			
 
				+  <!-- directory where autoconf files + temporary files and src is 
			
 
				+    stored for compilation -->
			
 
				+  <property name="build.c++.task-controller" 
			
 
				+    value="${build.c++}/task-controller" />
			
 
				+  <property name="task-controller.prefix.dir" value="${dist.dir}" />
			
 
				+  <!-- the configuration directory for the linux task controller -->
			
 
				+  <property name="hadoop.conf.dir" value="/etc/hadoop"/>
			
 
				+
			
 
				+  <!-- end of task-controller properties -->
			
 
				+
			
 
				   <!-- IVY properteis set here -->
			
 
				   <property name="ivy.dir" location="ivy" />
			
 
				   <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
			
 
				+  <property name="mvnrepo" value="http://repo2.maven.org/maven2"/>
			
 
				+  <property name="asfrepo" value="https://repository.apache.org"/> 
			
 
				   <property name="ivy.jar" location="${ivy.dir}/ivy-${ivy.version}.jar"/>
			
 
				-  <property name="ivy_repo_url" value="http://repo2.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar"/>
			
 
				-  <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml" />
			
 
				+  <property name="ivy_repo_url" 
			
 
				+    value="${mvnrepo}/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar"/>
			
 
				+  <property name="ant_task.jar" 
			
 
				+    location="${ivy.dir}/maven-ant-tasks-${ant-task.version}.jar"/>
			
 
				+  <property name="tsk.org" value="/org/apache/maven/maven-ant-tasks/"/>
			
 
				+  <property name="ant_task_repo_url"
			
 
				+    value="${mvnrepo}${tsk.org}${ant-task.version}/maven-ant-tasks-${ant-task.version}.jar"/>
			
 
				+  <property name="repo" value="snapshots"/>
			
 
				+  <property name="asfsnapshotrepo" 
			
 
				+    value="${asfrepo}/content/repositories/snapshots"/> 
			
 
				+  <property name="asfstagingrepo"
			
 
				+    value="${asfrepo}/service/local/staging/deploy/maven2"/> 
			
 
				+  <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"/>
			
 
				   <property name="ivy.org" value="org.apache.hadoop"/>
			
 
				   <property name="build.dir" location="build" />
			
 
				   <property name="dist.dir" value="${build.dir}/${final.name}"/>
			
 
				   <property name="build.ivy.dir" location="${build.dir}/ivy" />
			
 
				-  <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
			
 
				-  <property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/>
			
 
				-  <property name="build.ivy.report.dir" location="${build.ivy.dir}/report" />
			
 
				-  <property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven" />
			
 
				-  <property name="build.ivy.maven.pom" location="${build.ivy.maven.dir}/hadoop-core-${hadoop.version}.pom" />
			
 
				-  <property name="build.ivy.maven.jar" location="${build.ivy.maven.dir}/hadoop-core-${hadoop.version}.jar" />
			
 
				-
			
 
				+  <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib"/>
			
 
				+  <property name="common.ivy.lib.dir" 
			
 
				+    location="${build.ivy.lib.dir}/${ant.project.name}/common"/>
			
 
				+  <property name="build.ivy.report.dir" location="${build.ivy.dir}/report"/>
			
 
				+
			
 
				+  <property name="hadoop-core.pom" location="${ivy.dir}/hadoop-core-pom.xml"/>
			
 
				+  <property name="hadoop-core-pom-template.xml" 
			
 
				+    location="${ivy.dir}/hadoop-core-pom-template.xml"/>
			
 
				+  <property name="hadoop-core.jar" location="${build.dir}/${core.final.name}.jar"/>
			
 
				+  <property name="hadoop-test.pom" location="${ivy.dir}/hadoop-test-pom.xml"/>
			
 
				+  <property name="hadoop-test-pom-template.xml" 
			
 
				+    location="${ivy.dir}/hadoop-test-pom-template.xml" />
			
 
				+  <property name="hadoop-test.jar" location="${build.dir}/${test.final.name}.jar"/>
			
 
				+  <property name="hadoop-tools.pom" location="${ivy.dir}/hadoop-tools-pom.xml"/>
			
 
				+  <property name="hadoop-tools-pom-template.xml" 
			
 
				+    location="${ivy.dir}/hadoop-tools-pom-template.xml" />
			
 
				+  <property name="hadoop-tools.jar" location="${build.dir}/${tools.final.name}.jar"/>
			
 
				+  <property name="hadoop-examples.pom" location="${ivy.dir}/hadoop-examples-pom.xml"/>
			
 
				+  <property name="hadoop-examples-pom-template.xml" 
			
 
				+    location="${ivy.dir}/hadoop-examples-pom-template.xml"/>
			
 
				+  <property name="hadoop-examples.jar" 
			
 
				+    location="${build.dir}/${examples.final.name}.jar"/>
			
 
				+  <property name="hadoop-streaming.pom" 
			
 
				+    location="${ivy.dir}/hadoop-streaming-pom.xml"/>
			
 
				+  <property name="hadoop-streaming-pom-template.xml" 
			
 
				+    location="${ivy.dir}/hadoop-streaming-pom-template.xml"/>
			
 
				+  <property name="hadoop-streaming.jar" 
			
 
				+    location="${build.dir}/contrib/streaming/${streaming.final.name}.jar"/>
			
 
				+   
			
 
				   <!--this is the naming policy for artifacts we want pulled down-->
			
 
				-  <property name="ivy.artifact.retrieve.pattern" value="${ant.project.name}/[conf]/[artifact]-[revision].[ext]"/>
			
 
				+  <property name="ivy.artifact.retrieve.pattern" 
			
 
				+    value="${ant.project.name}/[conf]/[artifact]-[revision].[ext]"/>
			
 
				 
			
 
				   <!--this is how artifacts that get built are named-->
			
 
				   <property name="ivy.publish.pattern" value="hadoop-[revision]-core.[ext]"/>
			
 
				-  <property name="hadoop.jar" location="${build.dir}/hadoop-${hadoop.version}-core.jar" />
			
 
				 
			
 
				   <!-- jdiff.home property set -->
			
 
				-  <property name="jdiff.home" value="${build.ivy.lib.dir}/${ant.project.name}/jdiff"/>
			
 
				+  <property name="jdiff.home" 
			
 
				+    value="${build.ivy.lib.dir}/${ant.project.name}/jdiff"/>
			
 
				   <property name="jdiff.jar" value="${jdiff.home}/jdiff-${jdiff.version}.jar"/>
			
 
				   <property name="xerces.jar" value="${jdiff.home}/xerces-${xerces.version}.jar"/>
			
 
				 
			
@@ -187,6 +259,10 @@
 
				     </and>
			
 
				   </condition>
			
 
				 
			
 
				+  <condition property="staging">
			
 
				+     <equals arg1="${repo}" arg2="staging"/>
			
 
				+  </condition>
			
 
				+
			
 
				   <!-- the normal classpath -->
			
 
				   <path id="classpath">
			
 
				     <pathelement location="${build.classes}"/>
			
@@ -208,8 +284,8 @@
 
				     <pathelement location="${build.tools}"/>
			
 
				     <pathelement path="${clover.jar}"/>
			
 
				     <fileset dir="${test.lib.dir}">
			
 
				-      <include name="**/*.jar" />
			
 
				-      <exclude name="**/excluded/" />
			
 
				+      <include name="**/*.jar"/>
			
 
				+      <exclude name="**/excluded/"/>
			
 
				     </fileset>
			
 
				     <path refid="classpath"/>
			
 
				   </path>
			
@@ -222,9 +298,6 @@
 
				     <pathelement location="${build.dir}"/>
			
 
				   </path>
			
 
				 
			
 
				-  <!-- properties dependent on the items defined above. -->
			
 
				-  <!--<available classname="${rat.reporting.classname}" classpathref="classpath" property="rat.present" value="true"/> -->
			
 
				-
			
 
				   <!-- ====================================================== -->
			
 
				   <!-- Macro definitions                                      -->
			
 
				   <!-- ====================================================== -->
			
@@ -249,6 +322,7 @@
 
				     <mkdir dir="${build.src}"/>
			
 
				     <mkdir dir="${build.webapps}/task/WEB-INF"/>
			
 
				     <mkdir dir="${build.webapps}/job/WEB-INF"/>
			
 
				+    <mkdir dir="${build.webapps}/history/WEB-INF"/>
			
 
				     <mkdir dir="${build.webapps}/hdfs/WEB-INF"/>
			
 
				     <mkdir dir="${build.webapps}/datanode/WEB-INF"/>
			
 
				     <mkdir dir="${build.webapps}/secondary/WEB-INF"/>
			
@@ -285,7 +359,7 @@
 
				     </copy>
			
 
				 
			
 
				     <exec executable="sh">
			
 
				-       <arg line="src/saveVersion.sh ${version}"/>
			
 
				+       <arg line="src/saveVersion.sh ${version} ${build.dir}"/>
			
 
				     </exec>
			
 
				 	
			
 
				    <exec executable="sh">
			
@@ -293,6 +367,8 @@
 
				    </exec>
			
 
				   </target>
			
 
				 
			
 
				+  <import file="${test.src.dir}/aop/build/aop.xml"/>
			
 
				+
			
 
				   <!-- ====================================================== -->
			
 
				   <!-- Compile the Java files                                 -->
			
 
				   <!-- ====================================================== -->
			
@@ -349,7 +425,7 @@
 
				      
			
 
				   </target>
			
 
				 
			
 
				-  <target name="compile-mapred-classes" depends="compile-core-classes">
			
 
				+  <target name="compile-mapred-classes" depends="compile-core-classes,compile-hdfs-classes">
			
 
				     <jsp-compile
			
 
				      uriroot="${src.webapps}/task"
			
 
				      outputdir="${build.src}"
			
@@ -357,8 +433,21 @@
 
				      webxml="${build.webapps}/task/WEB-INF/web.xml">
			
 
				     </jsp-compile>
			
 
				 
			
 
				+    <!-- Compile Java files (excluding JSPs) checking warnings -->
			
 
				+    <jsp-compile
			
 
				+     uriroot="${src.webapps}/history"
			
 
				+     outputdir="${build.src}"
			
 
				+     package="org.apache.hadoop.mapred"
			
 
				+     webxml="${build.webapps}/history/WEB-INF/web.xml">
			
 
				+    </jsp-compile>
			
 
				+    
			
 
				+    <copy todir="${build.webapps}/job">
			
 
				+      <fileset dir="${src.webapps}/job" includes="**/*.jsp"/>
			
 
				+      <fileset dir="${src.webapps}/history" includes="**/*.jsp"/>
			
 
				+    </copy>
			
 
				+     
			
 
				     <jsp-compile
			
 
				-     uriroot="${src.webapps}/job"
			
 
				+     uriroot="${build.webapps}/job"
			
 
				      outputdir="${build.src}"
			
 
				      package="org.apache.hadoop.mapred"
			
 
				      webxml="${build.webapps}/job/WEB-INF/web.xml">
			
@@ -455,6 +544,8 @@
 
				   	
			
 
				     <mkdir dir="${build.native}/lib"/>
			
 
				     <mkdir dir="${build.native}/src/org/apache/hadoop/io/compress/zlib"/>
			
 
				+    <mkdir dir="${build.native}/src/org/apache/hadoop/io/nativeio"/>
			
 
				+    <mkdir dir="${build.native}/src/org/apache/hadoop/security"/>
			
 
				 
			
 
				   	<javah
			
 
				   	  classpath="${build.classes}"
			
@@ -466,6 +557,32 @@
 
				       <class name="org.apache.hadoop.io.compress.zlib.ZlibDecompressor" />
			
 
				   	</javah>
			
 
				 
			
 
				+        <javah
			
 
				+          classpath="${build.classes}"
			
 
				+          destdir="${build.native}/src/org/apache/hadoop/io/nativeio"
			
 
				+       force="yes"
			
 
				+          verbose="yes"
			
 
				+          >
			
 
				+          <class name="org.apache.hadoop.io.nativeio.NativeIO" />
			
 
				+        </javah>
			
 
				+  	<javah
			
 
				+  	  classpath="${build.classes}"
			
 
				+  	  destdir="${build.native}/src/org/apache/hadoop/security"
			
 
				+      force="yes"
			
 
				+  	  verbose="yes"
			
 
				+  	  >
			
 
				+  	  <class name="org.apache.hadoop.security.JniBasedUnixGroupsMapping" />
			
 
				+  	</javah>
			
 
				+
			
 
				+  	<javah
			
 
				+  	  classpath="${build.classes}"
			
 
				+  	  destdir="${build.native}/src/org/apache/hadoop/security"
			
 
				+      force="yes"
			
 
				+  	  verbose="yes"
			
 
				+  	  >
			
 
				+  	  <class name="org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping" />
			
 
				+  	</javah>
			
 
				+
			
 
				 	<exec dir="${build.native}" executable="sh" failonerror="true">
			
 
				 	  <env key="OS_NAME" value="${os.name}"/>
			
 
				 	  <env key="OS_ARCH" value="${os.arch}"/>
			
@@ -482,7 +599,7 @@
 
				     </exec>
			
 
				 
			
 
				 	<exec dir="${build.native}" executable="sh" failonerror="true">
			
 
				-	  <arg line="${build.native}/libtool --mode=install cp ${build.native}/lib/libhadoop.la ${build.native}/lib"/>
			
 
				+	  <arg line="${build.native}/libtool --mode=install cp ${build.native}/libhadoop.la ${build.native}/lib"/>
			
 
				     </exec>
			
 
				 
			
 
				   </target>
			
@@ -493,7 +610,7 @@
 
				   	description="Compile core only">
			
 
				   </target>
			
 
				 
			
 
				-  <target name="compile-contrib" depends="compile-core,compile-c++-libhdfs">
			
 
				+  <target name="compile-contrib" depends="compile-core,tools-jar,compile-c++-libhdfs">
			
 
				      <subant target="compile">
			
 
				         <property name="version" value="${version}"/>
			
 
				         <fileset file="${contrib.dir}/build.xml"/>
			
@@ -532,7 +649,9 @@
 
				     <tar compression="gzip" destfile="${build.classes}/bin.tgz">
			
 
				       <tarfileset dir="bin" mode="755"/>
			
 
				     </tar>
			
 
				-    <jar jarfile="${build.dir}/${final.name}-core.jar"
			
 
				+    <property name="jar.properties.list"
			
 
				+      value="commons-logging.properties, log4j.properties, hadoop-metrics.properties"/>
			
 
				+    <jar jarfile="${build.dir}/${core.final.name}.jar"
			
 
				          basedir="${build.classes}">
			
 
				       <manifest>
			
 
				         <section name="org/apache/hadoop">
			
@@ -541,9 +660,8 @@
 
				           <attribute name="Implementation-Vendor" value="Apache"/>
			
 
				         </section>
			
 
				       </manifest>
			
 
				-      <fileset file="${conf.dir}/commons-logging.properties"/>
			
 
				-      <fileset file="${conf.dir}/log4j.properties"/>
			
 
				-      <fileset file="${conf.dir}/hadoop-metrics.properties"/>
			
 
				+      <fileset dir="${conf.dir}" includes="${jar.properties.list}" />
			
 
				+      <fileset file="${jar.extra.properties.list}" />
			
 
				       <zipfileset dir="${build.webapps}" prefix="webapps"/>
			
 
				     </jar>
			
 
				   </target>
			
@@ -554,18 +672,29 @@
 
				   <!--                                                                    -->
			
 
				   <!-- ================================================================== -->
			
 
				   <target name="examples" depends="jar, compile-examples" description="Make the Hadoop examples jar.">
			
 
				-    <jar jarfile="${build.dir}/${final.name}-examples.jar"
			
 
				-         basedir="${build.examples}">
			
 
				-      <manifest>
			
 
				-        <attribute name="Main-Class" 
			
 
				-                   value="org/apache/hadoop/examples/ExampleDriver"/>
			
 
				-      </manifest>
			
 
				-    </jar>
			
 
				+    <macro-jar-examples
			
 
				+      build.dir="${build.dir}"
			
 
				+      basedir="${build.examples}">
			
 
				+    </macro-jar-examples>
			
 
				   </target>
			
 
				 
			
 
				+  <macrodef name="macro-jar-examples">
			
 
				+    <attribute name="build.dir" />
			
 
				+    <attribute name="basedir" />
			
 
				+    <sequential>
			
 
				+      <jar jarfile="@{build.dir}/${examples.final.name}.jar"
			
 
				+           basedir="@{basedir}">
			
 
				+        <manifest>
			
 
				+          <attribute name="Main-Class"
			
 
				+                    value="org/apache/hadoop/examples/ExampleDriver"/>
			
 
				+        </manifest>
			
 
				+      </jar>
			
 
				+    </sequential>
			
 
				+  </macrodef>
			
 
				+
			
 
				   <target name="tools-jar" depends="jar, compile-tools" 
			
 
				           description="Make the Hadoop tools jar.">
			
 
				-    <jar jarfile="${build.dir}/${final.name}-tools.jar"
			
 
				+    <jar jarfile="${build.dir}/${tools.final.name}.jar"
			
 
				          basedir="${build.tools}">
			
 
				       <manifest>
			
 
				         <attribute name="Main-Class" 
			
@@ -575,15 +704,15 @@
 
				   </target>
			
 
				 
			
 
				   <!-- ================================================================== -->
			
 
				-  <!-- Make the Hadoop metrics jar. (for use outside Hadoop)              -->
			
 
				+  <!-- Make the Hadoop metrics plugin dev/sdk jar. (for use outside Hadoop)              -->
			
 
				   <!-- ================================================================== -->
			
 
				   <!--                                                                    -->
			
 
				   <!-- ================================================================== -->
			
 
				-  <target name="metrics.jar" depends="compile-core" description="Make the Hadoop metrics jar. (for use outside Hadoop)">
			
 
				-    <jar jarfile="${build.dir}/hadoop-metrics-${version}.jar"
			
 
				+  <target name="metrics.jar" depends="compile-core" description="Make the Hadoop metrics plugin dev/sdk jar. (for use outside Hadoop)">
			
 
				+    <jar jarfile="${build.dir}/hadoop-metrics-dev-${version}.jar"
			
 
				          basedir="${build.classes}">
			
 
				-      <include name="**/metrics/**" />
			
 
				-      <exclude name="**/package.html" />
			
 
				+      <include name="**/metrics2/*.class" />
			
 
				+      <include name="**/metrics2/util/*.class" />
			
 
				     </jar>
			
 
				   </target>
			
 
				 
			
@@ -685,7 +814,7 @@
 
				   <!--                                                                    -->
			
 
				   <!-- ================================================================== -->
			
 
				   <target name="jar-test" depends="compile-core-test" description="Make hadoop-test.jar">
			
 
				-    <jar jarfile="${build.dir}/${final.name}-test.jar"
			
 
				+    <jar jarfile="${build.dir}/${test.final.name}.jar"
			
 
				          basedir="${test.build.classes}">
			
 
				          <manifest>
			
 
				            <attribute name="Main-Class"
			
@@ -699,65 +828,261 @@
 
				     </jar>
			
 
				   </target>
			
 
				 
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Fault injection customization section.
			
 
				+       These targets ought to be copied over to other projects and modified
			
 
				+       as needed -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="-classes-compilation" depends="compile-core-classes,
			
 
				+      compile-hdfs-classes, compile-mapred-classes, compile-core-test"/>
			
 
				+  <target name="run-test-core-fault-inject" depends="injectfaults"
			
 
				+	  description="Run full set of the unit tests with fault injection">
			
 
				+    <macro-run-tests-fault-inject target.name="test-core"
			
 
				+      testcasesonly="false"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="jar-test-fault-inject" depends="injectfaults"
			
 
				+    description="Make hadoop-test-fi.jar">
			
 
				+    <macro-jar-test-fault-inject
			
 
				+      target.name="jar-test"
			
 
				+      jar.final.name="test.final.name"
			
 
				+      jar.final.value="${test.final.name}-fi" />
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="jar-fault-inject" depends="injectfaults"
			
 
				+    description="Make hadoop-fi.jar">
			
 
				+    <macro-jar-fault-inject
			
 
				+      target.name="jar"
			
 
				+      build.dir="${build-fi.dir}"
			
 
				+      jar.final.name="final.name"
			
 
				+      jar.final.value="${final.name}-fi" />
			
 
				+  </target>
			
 
				+
			
 
				+  <!--This target is not included into the the top level list of target
			
 
				+  for it serves a special "regression" testing purpose of non-FI tests in
			
 
				+  FI environment -->
			
 
				+  <target name="run-fault-inject-with-testcaseonly" depends="injectfaults">
			
 
				+    <fail unless="testcase">Can't run this target without -Dtestcase setting!
			
 
				+    </fail>
			
 
				+    <macro-run-tests-fault-inject target.name="test-core"
			
 
				+      testcasesonly="true"/>
			
 
				+  </target>
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- End of Fault injection customization section                       -->
			
 
				+  <!-- ================================================================== -->
			
 
				+
			
 
				+  <condition property="tests.notestcase">
			
 
				+    <and>
			
 
				+      <isfalse value="${test.fault.inject}"/>
			
 
				+      <not>
			
 
				+        <isset property="testcase"/>
			
 
				+      </not>
			
 
				+    </and>
			
 
				+  </condition>
			
 
				+  <condition property="tests.notestcase.fi">
			
 
				+    <and>
			
 
				+      <not>
			
 
				+        <isset property="testcase" />
			
 
				+      </not>
			
 
				+      <istrue value="${test.fault.inject}" />
			
 
				+    </and>
			
 
				+  </condition>
			
 
				+  <condition property="tests.testcase">
			
 
				+    <and>
			
 
				+      <isfalse value="${test.fault.inject}" />
			
 
				+      <isset property="testcase" />
			
 
				+    </and>
			
 
				+  </condition>
			
 
				+  <condition property="tests.testcase.fi">
			
 
				+    <and>
			
 
				+      <istrue value="${test.fault.inject}" />
			
 
				+      <isset property="testcase" />
			
 
				+    </and>
			
 
				+  </condition>
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Define exclude lists for different kinds of testing -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <patternset id="empty.exclude.list.id" />
			
 
				+    <patternset id="commit.smoke.exclude.list.id">
			
 
				+    <excludesfile name="${test.commit.tests.file}"/>
			
 
				+    <excludesfile name="${test.smoke.tests.file}"/>
			
 
				+  </patternset>
			
 
				+
			
 
				   <!-- ================================================================== -->
			
 
				   <!-- Run unit tests                                                     --> 
			
 
				   <!-- ================================================================== -->
			
 
				-  <target name="test-core" depends="jar-test" description="Run core unit tests">
			
 
				-
			
 
				-    <delete dir="${test.build.data}"/>
			
 
				-    <mkdir dir="${test.build.data}"/>
			
 
				-    <delete dir="${test.log.dir}"/>
			
 
				-    <mkdir dir="${test.log.dir}"/>
			
 
				-  	<copy file="${test.src.dir}/hadoop-policy.xml" 
			
 
				-  	  todir="${test.build.extraconf}" />
			
 
				-    <junit showoutput="${test.output}"
			
 
				-      printsummary="${test.junit.printsummary}"
			
 
				-      haltonfailure="${test.junit.haltonfailure}"
			
 
				-      fork="yes"
			
 
				-      forkmode="${test.junit.fork.mode}"
			
 
				-      maxmemory="${test.junit.maxmemory}"
			
 
				-      dir="${basedir}" timeout="${test.timeout}"
			
 
				-      errorProperty="tests.failed" failureProperty="tests.failed">
			
 
				-      <sysproperty key="test.build.data" value="${test.build.data}"/>
			
 
				-      <sysproperty key="test.cache.data" value="${test.cache.data}"/>    	
			
 
				-      <sysproperty key="test.debug.data" value="${test.debug.data}"/>
			
 
				-      <sysproperty key="hadoop.log.dir" value="${test.log.dir}"/>
			
 
				-      <sysproperty key="test.src.dir" value="${test.src.dir}"/>
			
 
				-      <sysproperty key="test.build.extraconf" value="${test.build.extraconf}" />
			
 
				-      <sysproperty key="hadoop.policy.file" value="hadoop-policy.xml"/>
			
 
				-      <sysproperty key="java.library.path"
			
 
				-       value="${build.native}/lib:${lib.dir}/native/${build.platform}"/>
			
 
				-      <sysproperty key="install.c++.examples" value="${install.c++.examples}"/>
			
 
				-      <!-- set io.compression.codec.lzo.class in the child jvm only if it is set -->
			
 
				-	  <syspropertyset dynamic="no">
			
 
				-		  <propertyref name="io.compression.codec.lzo.class"/>
			
 
				-	  </syspropertyset>
			
 
				-      <!-- set compile.c++ in the child jvm only if it is set -->
			
 
				-      <syspropertyset dynamic="no">
			
 
				-         <propertyref name="compile.c++"/>
			
 
				-      </syspropertyset>
			
 
				-      <classpath refid="${test.classpath.id}"/>
			
 
				-      <formatter type="${test.junit.output.format}" />
			
 
				-      <batchtest todir="${test.build.dir}" unless="testcase">
			
 
				-        <fileset dir="${test.src.dir}"
			
 
				-	         includes="**/${test.include}.java"
			
 
				-		 excludes="**/${test.exclude}.java" />
			
 
				-      </batchtest>
			
 
				-      <batchtest todir="${test.build.dir}" if="testcase">
			
 
				-        <fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>
			
 
				-      </batchtest>
			
 
				-    </junit>
			
 
				-    <fail if="tests.failed">Tests failed!</fail>
			
 
				+  <macrodef name="macro-test-runner">
			
 
				+    <attribute name="test.file" />
			
 
				+    <attribute name="classpath" />
			
 
				+    <attribute name="test.dir" />
			
 
				+    <attribute name="fileset.dir" />
			
 
				+    <attribute name="hadoop.conf.dir.deployed" default="" />
			
 
				+    <attribute name="test.krb5.conf" default="" />
			
 
				+    <attribute name="test.krb5.conf.filename" default="" />
			
 
				+    <attribute name="exclude.list.id" default="empty.exclude.list.id" />
			
 
				+    <sequential>
			
 
				+      <delete file="${test.build.dir}/testsfailed"/>
			
 
				+      <delete dir="@{test.dir}/data" />
			
 
				+      <mkdir dir="@{test.dir}/data" />
			
 
				+      <delete dir="@{test.dir}/logs" />
			
 
				+      <mkdir dir="@{test.dir}/logs" />
			
 
				+      <copy file="${test.src.dir}/hadoop-policy.xml"
			
 
				+            todir="@{test.dir}/extraconf" />
			
 
				+      <copy file="${test.src.dir}/fi-site.xml"
			
 
				+            todir="@{test.dir}/extraconf" />
			
 
				+      <junit showoutput="${test.output}"
			
 
				+             printsummary="${test.junit.printsummary}"
			
 
				+             haltonfailure="${test.junit.haltonfailure}"
			
 
				+             fork="yes"
			
 
				+             forkmode="${test.junit.fork.mode}"
			
 
				+             maxmemory="${test.junit.maxmemory}"
			
 
				+             dir="${basedir}"
			
 
				+             timeout="${test.timeout}"
			
 
				+             errorProperty="tests.failed"
			
 
				+             failureProperty="tests.failed">
			
 
				+        <sysproperty key="test.build.data" value="${test.build.data}" />
			
 
				+        <sysproperty key="test.tools.input.dir"
			
 
				+                     value="${test.tools.input.dir}" />
			
 
				+        <sysproperty key="test.cache.data" value="${test.cache.data}" />
			
 
				+        <sysproperty key="test.debug.data" value="${test.debug.data}" />
			
 
				+        <sysproperty key="hadoop.log.dir" value="${test.log.dir}" />
			
 
				+        <sysproperty key="test.src.dir" value="${test.src.dir}" />
			
 
				+        <sysproperty key="taskcontroller-path" value="${taskcontroller-path}" />
			
 
				+        <sysproperty key="taskcontroller-ugi" value="${taskcontroller-ugi}" />
			
 
				+        <sysproperty key="test.build.extraconf"
			
 
				+                     value="@{test.dir}/extraconf" />
			
 
				+        <sysproperty key="@{test.krb5.conf}"
			
 
				+                     value="@{test.krb5.conf.filename}"/>
			
 
				+        <sysproperty key="hadoop.policy.file" value="hadoop-policy.xml" />
			
 
				+        <sysproperty key="java.library.path"
			
 
				+                     value="${build.native}/lib:${lib.dir}/native/${build.platform}:${lib.file.path}" />
			
 
				+        <sysproperty key="install.c++.examples"
			
 
				+                     value="${install.c++.examples}" />
			
 
				+        <sysproperty key="testjar"
			
 
				+                     value="@{test.dir}/testjar" />
			
 
				+        <!-- System properties that are specifically set for system tests -->
			
 
				+        <sysproperty key="test.system.hdrc.deployed.hadoopconfdir"
			
 
				+                     value="@{hadoop.conf.dir.deployed}" />
			
 
				+        <!-- set io.compression.codec.lzo.class in the child jvm only if it is set -->
			
 
				+        <syspropertyset dynamic="no">
			
 
				+          <propertyref name="io.compression.codec.lzo.class" />
			
 
				+        </syspropertyset>
			
 
				+        <!-- set compile.c++ in the child jvm only if it is set -->
			
 
				+        <syspropertyset dynamic="no">
			
 
				+          <propertyref name="compile.c++" />
			
 
				+        </syspropertyset>
			
 
				+        <classpath refid="@{classpath}" />
			
 
				+        <syspropertyset id="FaultProbabilityProperties">
			
 
				+          <propertyref regex="fi.*" />
			
 
				+        </syspropertyset>
			
 
				+        <formatter type="${test.junit.output.format}" />
			
 
				+        <batchtest todir="@{test.dir}" if="tests.notestcase">
			
 
				+          <fileset dir="@{fileset.dir}"
			
 
				+                   excludes="**/${test.exclude}.java aop/** system/**">
			
 
				+            <patternset>
			
 
				+              <includesfile name="@{test.file}"/>
			
 
				+            </patternset>
			
 
				+            <patternset refid="@{exclude.list.id}"/>
			
 
				+          </fileset>
			
 
				+        </batchtest>
			
 
				+        <batchtest todir="${test.build.dir}" if="tests.notestcase.fi">
			
 
				+          <fileset dir="${test.src.dir}/aop"
			
 
				+                   includes="**/${test.include}.java"
			
 
				+                   excludes="**/${test.exclude}.java" />
			
 
				+        </batchtest>
			
 
				+        <batchtest todir="@{test.dir}" if="tests.testcase">
			
 
				+          <fileset dir="@{fileset.dir}"
			
 
				+            includes="**/${testcase}.java" excludes="aop/** system/**"/>
			
 
				+        </batchtest>
			
 
				+        <batchtest todir="${test.build.dir}" if="tests.testcase.fi">
			
 
				+          <fileset dir="${test.src.dir}/aop" includes="**/${testcase}.java" />
			
 
				+        </batchtest>
			
 
				+        <!--The following batch is for very special occasions only when
			
 
				+                a non-FI tests are needed to be executed against FI-environment -->
			
 
				+        <batchtest todir="${test.build.dir}" if="tests.testcaseonly">
			
 
				+          <fileset dir="${test.src.dir}" includes="**/${testcase}.java" />
			
 
				+        </batchtest>
			
 
				+      </junit>
			
 
				+      <antcall target="checkfailure"/>
			
 
				+    </sequential>
			
 
				+  </macrodef>
			
 
				+
			
 
				+  <target name="test-core" depends="test-commit, test-smoke,
			
 
				+    test-core-excluding-commit-and-smoke,
			
 
				+    test-core-all-withtestcaseonly, jar-test"
			
 
				+    description="Run core unit tests">
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="test-core-all-withtestcaseonly" depends="jar-test" if="testcase">
			
 
				+    <macro-test-runner test.file="${test.all.tests.file}"
			
 
				+                       classpath="${test.classpath.id}"
			
 
				+                       test.dir="${test.build.dir}"
			
 
				+                       fileset.dir="${test.src.dir}"
			
 
				+                       test.krb5.conf="java.security.krb5.conf"
			
 
				+                       test.krb5.conf.filename="${test.src.dir}/krb5.conf"
			
 
				+                       >
			
 
				+    </macro-test-runner>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="test-core-excluding-commit-and-smoke" depends="jar-test"
			
 
				+    unless="testcase">
			
 
				+    <macro-test-runner test.file="${test.all.tests.file}"
			
 
				+                       classpath="${test.classpath.id}"
			
 
				+                       test.dir="${test.build.dir}"
			
 
				+                       fileset.dir="${test.src.dir}"
			
 
				+                       test.krb5.conf="java.security.krb5.conf"
			
 
				+                       test.krb5.conf.filename="${test.src.dir}/krb5.conf"
			
 
				+                       exclude.list.id="commit.smoke.exclude.list.id"
			
 
				+                       >
			
 
				+    </macro-test-runner>
			
 
				   </target>   
			
 
				 
			
 
				+  <target name="test-commit" depends="jar-test" 
			
 
				+    description="Run approx 10-minute set of unit tests prior to commiting"
			
 
				+    unless="testcase">
			
 
				+    <macro-test-runner test.file="${test.commit.tests.file}"
			
 
				+                       classpath="${test.classpath.id}"
			
 
				+                       test.dir="${test.build.dir}"
			
 
				+                       fileset.dir="${test.src.dir}"
			
 
				+                       test.krb5.conf="java.security.krb5.conf"
			
 
				+                       test.krb5.conf.filename="${test.src.dir}/krb5.conf"
			
 
				+                       >
			
 
				+    </macro-test-runner>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="test-smoke" depends="jar-test"
			
 
				+    description="Run approx 30-minute set of functional tests prior to
			
 
				+      guarantee that the build is not DOA" unless="testcase">
			
 
				+    <macro-test-runner test.file="${test.smoke.tests.file}"
			
 
				+                       classpath="${test.classpath.id}"
			
 
				+                       test.dir="${test.build.dir}"
			
 
				+                       fileset.dir="${test.src.dir}"
			
 
				+                       test.krb5.conf="java.security.krb5.conf"
			
 
				+                       test.krb5.conf.filename="${test.src.dir}/krb5.conf"
			
 
				+                       >
			
 
				+    </macro-test-runner>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="checkfailure" if="tests.failed">
			
 
				+    <touch file="${test.build.dir}/testsfailed"/>
			
 
				+    <fail unless="continueOnFailure">Tests failed!</fail>
			
 
				+  </target>
			
 
				+
			
 
				   <target name="test-contrib" depends="compile, compile-core-test" description="Run contrib unit tests">
			
 
				     <subant target="test">
			
 
				        <property name="version" value="${version}"/>
			
 
				+       <property name="clover.jar" value="${clover.jar}"/>
			
 
				        <fileset file="${contrib.dir}/build.xml"/>
			
 
				     </subant> 
			
 
				   </target>
			
 
				 	  
			
 
				-  <target name="test" depends="test-core, test-contrib" description="Run core, contrib unit tests">
			
 
				+  <target name="test" description="Run core, contrib, fault injection tests">
			
 
				+    <delete file="${test.build.dir}/testsfailed"/>
			
 
				+    <property name="continueOnFailure" value="true"/>
			
 
				+    <antcall target="test-core"/>
			
 
				+    <antcall target="test-contrib"/>
			
 
				+    <available file="${test.build.dir}/testsfailed" property="testsfailed"/>
			
 
				+    <fail if="testsfailed">Tests failed!</fail>
			
 
				   </target>
			
 
				 
			
 
				   <!-- Run all unit tests, not just Test*, and use non-test configuration. -->
			
@@ -828,10 +1153,10 @@
 
				       <sourcePath path="${examples.dir}" />
			
 
				       <sourcePath path="${tools.src}" />
			
 
				       <sourcePath path="${basedir}/src/contrib/streaming/src/java" />
			
 
				-      <class location="${basedir}/build/${final.name}-core.jar" />
			
 
				-      <class location="${basedir}/build/${final.name}-examples.jar" />
			
 
				-      <class location="${basedir}/build/${final.name}-tools.jar" />
			
 
				-      <class location="${basedir}/build/contrib/streaming/${final.name}-streaming.jar" />
			
 
				+      <class location="${build.dir}/${core.final.name}.jar" />
			
 
				+      <class location="${build.dir}/${examples.final.name}.jar" />
			
 
				+      <class location="${build.dir}/${tools.final.name}.jar" />
			
 
				+      <class location="${build.dir}/contrib/streaming/${streaming.final.name}.jar" />
			
 
				     </findbugs>
			
 
				 
			
 
				         <xslt style="${findbugs.home}/src/xsl/default.xsl"
			
@@ -907,6 +1232,7 @@
 
				       windowtitle="${Name} ${version} API"
			
 
				       doctitle="${Name} ${version} Developer API"
			
 
				       bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
			
 
				+      maxmemory="${javadoc.maxmemory}"
			
 
				       >
			
 
				         <packageset dir="${core.src.dir}"/>
			
 
				         <packageset dir="${mapred.src.dir}"/>
			
@@ -949,6 +1275,7 @@
 
				       windowtitle="${Name} ${version} API"
			
 
				       doctitle="${Name} ${version} API"
			
 
				       bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
			
 
				+      maxmemory="${javadoc.maxmemory}"
			
 
				       >
			
 
				         <packageset dir="${core.src.dir}"/>
			
 
				         <packageset dir="${mapred.src.dir}"/>
			
@@ -982,7 +1309,7 @@
 
				   </target>	
			
 
				 
			
 
				   <target name="api-xml" depends="ivy-retrieve-jdiff,javadoc,write-null">
			
 
				-    <javadoc>
			
 
				+    <javadoc maxmemory="${javadoc.maxmemory}">
			
 
				        <doclet name="jdiff.JDiff"
			
 
				                path="${jdiff.jar}:${xerces.jar}">
			
 
				          <param name="-apidir" value="${jdiff.xml.dir}"/>
			
@@ -1009,7 +1336,8 @@
 
				     <mkdir dir="${jdiff.build.dir}"/>
			
 
				     <javadoc sourcepath="src/core,src/hdfs,src,mapred,src/tools"
			
 
				              destdir="${jdiff.build.dir}"
			
 
				-             sourceFiles="${jdiff.home}/Null.java">
			
 
				+	     sourceFiles="${jdiff.home}/Null.java"
			
 
				+	     maxmemory="${javadoc.maxmemory}">
			
 
				        <doclet name="jdiff.JDiff"
			
 
				                path="${jdiff.jar}:${xerces.jar}">
			
 
				          <param name="-oldapi" value="hadoop ${jdiff.stable}"/>
			
@@ -1043,7 +1371,7 @@
 
				   <!-- ================================================================== -->
			
 
				   <!--                                                                    -->
			
 
				   <!-- ================================================================== -->
			
 
				-  <target name="package" depends="compile, jar, javadoc, docs, cn-docs, api-report, examples, tools-jar, jar-test, ant-tasks, package-librecordio"
			
 
				+  <target name="package" depends="compile, jar, javadoc, docs, cn-docs, api-report, examples, tools-jar, jar-test, ant-tasks, package-librecordio, jsvc"
			
 
				 	  description="Build distribution">
			
 
				     <mkdir dir="${dist.dir}"/>
			
 
				     <mkdir dir="${dist.dir}/lib"/>
			
@@ -1083,7 +1411,7 @@
 
				     </copy>
			
 
				 
			
 
				     <copy todir="${dist.dir}"> 
			
 
				-      <fileset file="${build.dir}/${final.name}-*.jar"/>
			
 
				+      <fileset file="${build.dir}/${name}-*-${version}.jar"/>
			
 
				     </copy>
			
 
				     
			
 
				     <copy todir="${dist.dir}/bin">
			
@@ -1156,7 +1484,7 @@
 
				     </macro_tar>
			
 
				   </target>
			
 
				 
			
 
				-  <target name="bin-package" depends="compile, jar, examples, tools-jar, jar-test, ant-tasks, package-librecordio" 
			
 
				+  <target name="bin-package" depends="compile, jar, examples, tools-jar, jar-test, ant-tasks, package-librecordio, jsvc" 
			
 
				 		description="assembles artifacts for binary target">
			
 
				     <mkdir dir="${dist.dir}"/>
			
 
				     <mkdir dir="${dist.dir}/lib"/>
			
@@ -1193,7 +1521,7 @@
 
				     </copy>
			
 
				 
			
 
				     <copy todir="${dist.dir}"> 
			
 
				-      <fileset file="${build.dir}/${final.name}-*.jar"/>
			
 
				+      <fileset file="${build.dir}/${name}-*-${version}.jar"/>
			
 
				     </copy>
			
 
				     
			
 
				     <copy todir="${dist.dir}/bin">
			
@@ -1227,6 +1555,32 @@
 
				     </chmod>
			
 
				   </target>
			
 
				 
			
 
				+  <target name="binary-system" depends="bin-package, jar-system, jar-test-system"
			
 
				+     description="make system test package for deployment">
			
 
				+    <copy todir="${system-test-build-dir}/${final.name}">
			
 
				+      <fileset dir="${dist.dir}">
			
 
				+      </fileset>
			
 
				+    </copy>
			
 
				+    <copy todir="${system-test-build-dir}/${final.name}" 
			
 
				+      file="${system-test-build-dir}/${core.final.name}.jar" overwrite="true"/>
			
 
				+    <copy todir="${system-test-build-dir}/${final.name}"
			
 
				+      file="${system-test-build-dir}/${test.final.name}.jar" overwrite="true"/>
			
 
				+    <macro_tar 
			
 
				+      param.destfile="${system-test-build-dir}/${final.name}-bin.tar.gz">
			
 
				+        <param.listofitems>
			
 
				+          <tarfileset dir="${system-test-build-dir}" mode="664">
			
 
				+            <exclude name="${final.name}/bin/*" />
			
 
				+            <exclude name="${final.name}/src/**" />
			
 
				+            <exclude name="${final.name}/docs/**" />
			
 
				+            <include name="${final.name}/**" />
			
 
				+          </tarfileset>
			
 
				+          <tarfileset dir="${build.dir}" mode="755">
			
 
				+            <include name="${final.name}/bin/*" />
			
 
				+          </tarfileset>
			
 
				+        </param.listofitems>
			
 
				+      </macro_tar>
			
 
				+  </target>
			
 
				+  
			
 
				   <target name="binary" depends="bin-package" description="Make tarball without source and documentation">
			
 
				     <macro_tar param.destfile="${build.dir}/${final.name}-bin.tar.gz">
			
 
				       <param.listofitems>
			
@@ -1257,12 +1611,29 @@
 
				   <!-- ================================================================== -->
			
 
				   <!-- Clean.  Delete the build files, and their directories              -->
			
 
				   <!-- ================================================================== -->
			
 
				-  <target name="clean" depends="clean-contrib" description="Clean.  Delete the build files, and their directories">
			
 
				+  <target name="clean" depends="clean-contrib, clean-sign, clean-fi" description="Clean.  Delete the build files, and their directories">
			
 
				     <delete dir="${build.dir}"/>
			
 
				     <delete dir="${docs.src}/build"/>
			
 
				     <delete dir="${src.docs.cn}/build"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-core-pom.xml"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-test-pom.xml"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-examples-pom.xml"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-tools-pom.xml"/>
			
 
				+    <delete file="${basedir}/ivy/hadoop-streaming-pom.xml"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="clean-sign" description="Clean.  Delete .asc files">
			
 
				+    <delete>
			
 
				+      <fileset dir="." includes="**/**/*.asc"/>
			
 
				+    </delete>
			
 
				+  </target>  
			
 
				+ 
			
 
				+  <target name="veryclean" depends="clean" description="Delete mvn ant task jar and ivy ant taks jar">
			
 
				+    <delete file="${ant_task.jar}"/>
			
 
				+    <delete file="${ivy.jar}"/>
			
 
				   </target>
			
 
				 
			
 
				+
			
 
				   <!-- ================================================================== -->
			
 
				   <!-- Clean contrib target. For now, must be called explicitly           -->
			
 
				   <!-- Using subant instead of ant as a workaround for 30569              -->
			
@@ -1487,7 +1858,7 @@
 
				   <target name="ant-tasks" depends="jar, compile-ant-tasks">
			
 
				     <copy file="${anttasks.dir}/org/apache/hadoop/ant/antlib.xml"
			
 
				           todir="${build.anttasks}/org/apache/hadoop/ant"/>
			
 
				-    <jar destfile="${build.dir}/${final.name}-ant.jar">
			
 
				+    <jar destfile="${build.dir}/${ant.final.name}.jar">
			
 
				       <fileset dir="${build.anttasks}"/>
			
 
				     </jar>
			
 
				   </target>
			
@@ -1601,7 +1972,6 @@
 
				     <mkdir dir="${build.ivy.dir}" />
			
 
				     <mkdir dir="${build.ivy.lib.dir}" />
			
 
				     <mkdir dir="${build.ivy.report.dir}" />
			
 
				-    <mkdir dir="${build.ivy.maven.dir}" />
			
 
				   </target>
			
 
				 
			
 
				   <target name="ivy-probe-antlib" >
			
@@ -1731,70 +2101,226 @@
 
				     </echo>
			
 
				   </target>
			
 
				 
			
 
				-  <target name="assert-hadoop-jar-exists" depends="ivy-init">
			
 
				-    <fail>
			
 
				-      <condition >
			
 
				-        <not>
			
 
				-          <available file="${hadoop.jar}" />
			
 
				-        </not>
			
 
				-      </condition>
			
 
				-      Not found: ${hadoop.jar}
			
 
				-      Please run the target "jar" in the main build file
			
 
				-    </fail>
			
 
				-
			
 
				-  </target>
			
 
				-
			
 
				-  <target name="ready-to-publish" depends="jar,assert-hadoop-jar-exists,ivy-resolve"/>
			
 
				-
			
 
				-  <target name="ivy-publish-local" depends="ready-to-publish,ivy-resolve">
			
 
				-    <ivy:publish
			
 
				-      settingsRef="${ant.project.name}.ivy.settings"
			
 
				-      resolver="local"
			
 
				-      pubrevision="${hadoop.version}"
			
 
				-      overwrite="true"
			
 
				-      artifactspattern="${build.dir}/${ivy.publish.pattern}" />
			
 
				+  <target name="ant-task-download" description="To download mvn-ant-task">
			
 
				+    <get src="${ant_task_repo_url}" dest="${ant_task.jar}" usetimestamp="true"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="mvn-taskdef" depends="ant-task-download">
			
 
				+     <path id="mvn-ant-task.classpath" path="${ant_task.jar}"/>
			
 
				+     <typedef resource="org/apache/maven/artifact/ant/antlib.xml"
			
 
				+         uri="urn:maven-artifact-ant"
			
 
				+         classpathref="mvn-ant-task.classpath"/>
			
 
				+  </target>  
			
 
				+
			
 
				+  <target name="mvn-install" depends="mvn-taskdef,bin-package,set-version"
			
 
				+     description="To install hadoop core and test jars to local filesystem's m2 cache">
			
 
				+     <artifact:pom file="${hadoop-core.pom}" id="hadoop.core"/>
			
 
				+     <artifact:pom file="${hadoop-test.pom}" id="hadoop.test"/>
			
 
				+     <artifact:pom file="${hadoop-examples.pom}" id="hadoop.examples"/>
			
 
				+     <artifact:pom file="${hadoop-tools.pom}" id="hadoop.tools"/>
			
 
				+     <artifact:pom file="${hadoop-streaming.pom}" id="hadoop.streaming"/>
			
 
				+
			
 
				+     <artifact:install file="${hadoop-core.jar}">
			
 
				+        <pom refid="hadoop.core"/>
			
 
				+     </artifact:install>
			
 
				+     <artifact:install file="${hadoop-test.jar}">
			
 
				+        <pom refid="hadoop.test"/>
			
 
				+     </artifact:install>
			
 
				+     <artifact:install file="${hadoop-tools.jar}">
			
 
				+        <pom refid="hadoop.tools"/>
			
 
				+     </artifact:install>
			
 
				+     <artifact:install file="${hadoop-examples.jar}">
			
 
				+        <pom refid="hadoop.examples"/>
			
 
				+     </artifact:install>
			
 
				+     <artifact:install file="${hadoop-streaming.jar}">
			
 
				+        <pom refid="hadoop.streaming"/>
			
 
				+     </artifact:install>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="mvn-deploy" depends="mvn-taskdef, bin-package, set-version, signanddeploy, simpledeploy"
			
 
				+     description="To deploy hadoop core and test jar's to apache maven repository"/>
			
 
				+
			
 
				+  <target name="signanddeploy" if="staging" depends="sign">
			
 
				+     <artifact:pom file="${hadoop-core.pom}" id="hadoop.core"/>
			
 
				+     <artifact:pom file="${hadoop-test.pom}" id="hadoop.core.test"/>
			
 
				+     <artifact:pom file="${hadoop-examples.pom}" id="hadoop.examples"/>
			
 
				+     <artifact:pom file="${hadoop-tools.pom}" id="hadoop.tools"/>
			
 
				+     <artifact:pom file="${hadoop-streaming.pom}" id="hadoop.streaming"/>
			
 
				+     <artifact:install-provider artifactId="wagon-http"
			
 
				+       version="${wagon-http.version}"/>
			
 
				+     <artifact:deploy file="${hadoop-core.jar}">
			
 
				+         <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/>
			
 
				+         <pom refid="hadoop.core"/>
			
 
				+         <attach file="${hadoop-core.jar}.asc" type="jar.asc"/>
			
 
				+         <attach file="${hadoop-core.pom}.asc" type="pom.asc"/>
			
 
				+     </artifact:deploy>
			
 
				+     <artifact:deploy file="${hadoop-test.jar}">
			
 
				+         <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/> 
			
 
				+         <pom refid="hadoop.core.test"/>
			
 
				+         <attach file="${hadoop-test.jar}.asc" type="jar.asc"/>
			
 
				+         <attach file="${hadoop-test.pom}.asc" type="pom.asc"/>
			
 
				+     </artifact:deploy>
			
 
				+     <artifact:deploy file="${hadoop-tools.jar}">
			
 
				+         <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/> 
			
 
				+         <pom refid="hadoop.tools"/>
			
 
				+         <attach file="${hadoop-tools.jar}.asc" type="jar.asc"/>
			
 
				+         <attach file="${hadoop-tools.pom}.asc" type="pom.asc"/>
			
 
				+     </artifact:deploy>
			
 
				+     <artifact:deploy file="${hadoop-examples.jar}">
			
 
				+         <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/> 
			
 
				+         <pom refid="hadoop.examples"/>
			
 
				+         <attach file="${hadoop-examples.jar}.asc" type="jar.asc"/>
			
 
				+         <attach file="${hadoop-examples.pom}.asc" type="pom.asc"/>
			
 
				+     </artifact:deploy>
			
 
				+     <artifact:deploy file="${hadoop-streaming.jar}">
			
 
				+         <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/> 
			
 
				+         <pom refid="hadoop.streaming"/>
			
 
				+         <attach file="${hadoop-streaming.jar}.asc" type="jar.asc"/>
			
 
				+         <attach file="${hadoop-streaming.pom}.asc" type="pom.asc"/>
			
 
				+     </artifact:deploy>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="sign" depends="clean-sign" if="staging">
			
 
				+    <input message="password:>" addproperty="gpg.passphrase">
			
 
				+     <handler classname="org.apache.tools.ant.input.SecureInputHandler" />
			
 
				+    </input>
			
 
				+    <macrodef name="sign-artifact" description="Signs the artifact">
			
 
				+      <attribute name="input.file"/>
			
 
				+      <attribute name="output.file" default="@{input.file}.asc"/>
			
 
				+      <attribute name="gpg.passphrase"/>
			
 
				+      <sequential>
			
 
				+        <echo>Signing @{input.file} Sig File: @{output.file}</echo>
			
 
				+        <exec executable="gpg" >
			
 
				+          <arg value="--armor"/>
			
 
				+          <arg value="--output"/>
			
 
				+          <arg value="@{output.file}"/>
			
 
				+          <arg value="--passphrase"/>
			
 
				+          <arg value="@{gpg.passphrase}"/>
			
 
				+          <arg value="--detach-sig"/>
			
 
				+          <arg value="@{input.file}"/>
			
 
				+        </exec>
			
 
				+      </sequential>
			
 
				+    </macrodef>
			
 
				+    <sign-artifact input.file="${hadoop-core.jar}" 
			
 
				+     output.file="${hadoop-core.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-test.jar}" 
			
 
				+     output.file="${hadoop-test.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-tools.jar}" 
			
 
				+     output.file="${hadoop-tools.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-examples.jar}" 
			
 
				+     output.file="${hadoop-examples.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-streaming.jar}" 
			
 
				+     output.file="${hadoop-streaming.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-core.pom}" 
			
 
				+     output.file="${hadoop-core.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-test.pom}" 
			
 
				+     output.file="${hadoop-test.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-tools.pom}" 
			
 
				+     output.file="${hadoop-tools.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-examples.pom}" 
			
 
				+     output.file="${hadoop-examples.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+    <sign-artifact input.file="${hadoop-streaming.pom}" 
			
 
				+     output.file="${hadoop-streaming.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="simpledeploy" unless="staging">
			
 
				+     <artifact:pom file="${hadoop-core.pom}" id="hadoop.core"/>
			
 
				+     <artifact:pom file="${hadoop-test.pom}" id="hadoop.test"/>
			
 
				+     <artifact:pom file="${hadoop-examples.pom}" id="hadoop.examples"/>
			
 
				+     <artifact:pom file="${hadoop-tools.pom}" id="hadoop.tools"/>
			
 
				+     <artifact:pom file="${hadoop-streaming.pom}" id="hadoop.streaming"/>
			
 
				+
			
 
				+     <artifact:install-provider artifactId="wagon-http" version="${wagon-http.version}"/>
			
 
				+     <artifact:deploy file="${hadoop-core.jar}">
			
 
				+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
			
 
				+         <pom refid="hadoop.core"/>
			
 
				+     </artifact:deploy>
			
 
				+     <artifact:deploy file="${hadoop-test.jar}">
			
 
				+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
			
 
				+         <pom refid="hadoop.test"/>
			
 
				+     </artifact:deploy> 
			
 
				+     <artifact:deploy file="${hadoop-examples.jar}">
			
 
				+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
			
 
				+         <pom refid="hadoop.examples"/>
			
 
				+     </artifact:deploy>
			
 
				+     <artifact:deploy file="${hadoop-tools.jar}">
			
 
				+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
			
 
				+         <pom refid="hadoop.tools"/>
			
 
				+     </artifact:deploy>
			
 
				+     <artifact:deploy file="${hadoop-streaming.jar}">
			
 
				+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
			
 
				+         <pom refid="hadoop.streaming"/>
			
 
				+     </artifact:deploy>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="set-version">
			
 
				+    <delete file="${hadoop-core.pom}"/>
			
 
				+    <delete file="${hadoop-test.pom}"/>
			
 
				+    <delete file="${hadoop-examples.pom}"/>
			
 
				+    <delete file="${hadoop-tools.pom}"/>
			
 
				+    <delete file="${hadoop-streaming.pom}"/>
			
 
				+    <copy file="${hadoop-core-pom-template.xml}" tofile="${hadoop-core.pom}"/>
			
 
				+    <copy file="${hadoop-test-pom-template.xml}" tofile="${hadoop-test.pom}"/>
			
 
				+    <copy file="${hadoop-examples-pom-template.xml}" tofile="${hadoop-examples.pom}"/>
			
 
				+    <copy file="${hadoop-tools-pom-template.xml}" tofile="${hadoop-tools.pom}"/>
			
 
				+    <copy file="${hadoop-streaming-pom-template.xml}" tofile="${hadoop-streaming.pom}"/>
			
 
				+    <replaceregexp byline="true">
			
 
				+      <regexp pattern="@version"/>
			
 
				+      <substitution expression="${version}"/>
			
 
				+      <fileset dir="${basedir}/ivy">
			
 
				+        <include name="hadoop-core-pom.xml"/>
			
 
				+        <include name="hadoop-test-pom.xml"/>
			
 
				+        <include name="hadoop-tools-pom.xml"/>
			
 
				+        <include name="hadoop-examples-pom.xml"/>
			
 
				+        <include name="hadoop-streaming-pom.xml"/>
			
 
				+      </fileset>
			
 
				+    </replaceregexp>
			
 
				   </target>
			
 
				 
			
 
				-
			
 
				-  <!-- this is here for curiosity, to see how well the makepom task works
			
 
				-  Answer: it depends whether you want transitive dependencies excluded or not
			
 
				-  -->
			
 
				-  <target name="makepom" depends="ivy-resolve">
			
 
				-    <ivy:makepom settingsRef="${ant.project.name}.ivy.settings"
			
 
				-      ivyfile="ivy.xml"
			
 
				-      pomfile="${build.ivy.maven.dir}/generated.pom">
			
 
				-      <ivy:mapping conf="default" scope="default"/>
			
 
				-      <ivy:mapping conf="master" scope="master"/>
			
 
				-      <ivy:mapping conf="runtime" scope="runtime"/>
			
 
				-    </ivy:makepom>
			
 
				+  <!-- taskcontroller targets -->
			
 
				+  <target name="task-controller" depends="init">
			
 
				+    <exec executable="autoreconf" 
			
 
				+          dir="${c++.task-controller.src}"
			
 
				+          searchpath="yes" failonerror="yes">
			
 
				+      <arg value="-i"/>
			
 
				+    </exec>
			
 
				+    <mkdir dir="${build.c++.task-controller}" />
			
 
				+    <exec executable="${c++.task-controller.src}/configure"
			
 
				+          dir="${build.c++.task-controller}">
			
 
				+      <arg value="--prefix=${task-controller.prefix.dir}"/>
			
 
				+      <env key="CFLAGS" 
			
 
				+           value="-DHADOOP_CONF_DIR=${hadoop.conf.dir}"/>
			
 
				+    </exec>
			
 
				+    <!-- delete main in case HADOOP_CONF_DIR is different -->
			
 
				+    <delete file="${build.c++.task-controller}/impl/main.o"
			
 
				+            quiet="true" failonerror="false"/>
			
 
				+    <exec executable="make"
			
 
				+          dir="${build.c++.task-controller}"
			
 
				+          searchpath="yes" failonerror="yes">
			
 
				+      <arg value="install"/>
			
 
				+    </exec>
			
 
				   </target>
			
 
				 
			
 
				-
			
 
				-  <target name="copy-jar-to-maven" depends="ready-to-publish">
			
 
				-    <copy file="${hadoop.jar}"
			
 
				-      tofile="${build.ivy.maven.jar}"/>
			
 
				-    <checksum file="${build.ivy.maven.jar}" algorithm="md5"/>
			
 
				+  <target name="test-task-controller" depends="init,task-controller">
			
 
				+    <exec executable="make"
			
 
				+          dir="${build.c++.task-controller}"
			
 
				+          searchpath="yes" failonerror="yes">
			
 
				+      <arg value="check"/>
			
 
				+    </exec>
			
 
				   </target>
			
 
				 
			
 
				-  <target name="copypom" depends="ivy-init-dirs">
			
 
				+  <!-- end of task-controller targets -->
			
 
				 
			
 
				-   <presetdef name="expandingcopy" >
			
 
				-    <copy overwrite="true">
			
 
				-      <filterchain>
			
 
				-        <expandproperties/>
			
 
				-      </filterchain>
			
 
				-    </copy>
			
 
				-   </presetdef>
			
 
				+  <target name="jsvc" >
			
 
				+    <mkdir dir="${jsvc.build.dir}" />
			
 
				+    <get src="${jsvc.location}" dest="${jsvc.build.dir}/${jsvc.dest.name}" />
			
 
				 
			
 
				-   <expandingcopy file="ivy/hadoop-core.pom"
			
 
				-      tofile="${build.ivy.maven.pom}"/>
			
 
				-   <checksum file="${build.ivy.maven.pom}" algorithm="md5"/>
			
 
				-  </target>
			
 
				-
			
 
				-  <target name="maven-artifacts" depends="copy-jar-to-maven,copypom" />
			
 
				-
			
 
				-  <target name="published" depends="ivy-publish-local,maven-artifacts">
			
 
				+    <untar compression="gzip" src="${jsvc.build.dir}/${jsvc.dest.name}" dest="${jsvc.build.dir}" />
			
 
				 
			
 
				-  </target>
			
 
				+    <copy file="${jsvc.build.dir}/jsvc" todir="${jsvc.install.dir}" verbose="true" />
			
 
				+    <chmod perm="ugo+x" type="file">
			
 
				+      <fileset file="${jsvc.install.dir}/jsvc"/>
			
 
				+    </chmod>
			
 
				+ </target>
			
 
				 
			
 
				 </project>
			
--- a/conf/capacity-scheduler.xml.template
+++ b/conf/capacity-scheduler.xml.template
@@ -7,6 +7,14 @@
 
				 
			
 
				 <configuration>
			
 
				 
			
 
				+  <property>
			
 
				+    <name>mapred.capacity-scheduler.maximum-system-jobs</name>
			
 
				+    <value>3000</value>
			
 
				+    <description>Maximum number of jobs in the system which can be initialized,
			
 
				+     concurrently, by the CapacityScheduler.
			
 
				+    </description>    
			
 
				+  </property>
			
 
				+  
			
 
				   <property>
			
 
				     <name>mapred.capacity-scheduler.queue.default.capacity</name>
			
 
				     <value>100</value>
			
@@ -15,6 +23,25 @@
 
				     </description>    
			
 
				   </property>
			
 
				   
			
 
				+  <property>
			
 
				+    <name>mapred.capacity-scheduler.queue.default.maximum-capacity</name>
			
 
				+    <value>-1</value>
			
 
				+    <description>
			
 
				+	maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster.
			
 
				+	This provides a means to limit how much excess capacity a queue can use. By default, there is no limit.
			
 
				+	The maximum-capacity of a queue can only be greater than or equal to its minimum capacity.
			
 
				+        Default value of -1 implies a queue can use complete capacity of the cluster.
			
 
				+
			
 
				+        This property could be to curtail certain jobs which are long running in nature from occupying more than a 
			
 
				+        certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of 
			
 
				+        other queues being affected.
			
 
				+        
			
 
				+        One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity
			
 
				+        the max capacity would change. So if large no of nodes or racks get added to the cluster , max Capacity in 
			
 
				+        absolute terms would increase accordingly.
			
 
				+    </description>    
			
 
				+  </property>
			
 
				+  
			
 
				   <property>
			
 
				     <name>mapred.capacity-scheduler.queue.default.supports-priority</name>
			
 
				     <value>false</value>
			
@@ -38,14 +65,41 @@
 
				     value of 100 implies no user limits are imposed. 
			
 
				     </description>
			
 
				   </property>
			
 
				+  
			
 
				   <property>
			
 
				-    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user</name>
			
 
				-    <value>2</value>
			
 
				-    <description>The maximum number of jobs to be pre-initialized for a user
			
 
				-    of the job queue.
			
 
				+    <name>mapred.capacity-scheduler.queue.default.user-limit-factor</name>
			
 
				+    <value>1</value>
			
 
				+    <description>The multiple of the queue capacity which can be configured to 
			
 
				+    allow a single user to acquire more slots. 
			
 
				     </description>
			
 
				   </property>
			
 
				-  
			
 
				+
			
 
				+  <property>
			
 
				+    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks</name>
			
 
				+    <value>200000</value>
			
 
				+    <description>The maximum number of tasks, across all jobs in the queue, 
			
 
				+    which can be initialized concurrently. Once the queue's jobs exceed this 
			
 
				+    limit they will be queued on disk.  
			
 
				+    </description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-active-tasks-per-user</name>
			
 
				+    <value>100000</value>
			
 
				+    <description>The maximum number of tasks per-user, across all the of the 
			
 
				+    user's jobs in the queue, which can be initialized concurrently. Once the 
			
 
				+    user's jobs exceed this limit they will be queued on disk.  
			
 
				+    </description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>mapred.capacity-scheduler.queue.default.init-accept-jobs-factor</name>
			
 
				+    <value>10</value>
			
 
				+    <description>The multipe of (maximum-system-jobs * queue-capacity) used to 
			
 
				+    determine the number of jobs which are accepted by the scheduler.  
			
 
				+    </description>
			
 
				+  </property>
			
 
				+
			
 
				   <!-- The default configuration settings for the capacity task scheduler -->
			
 
				   <!-- The default values would be applied to all the queues which don't have -->
			
 
				   <!-- the appropriate property for the particular queue -->
			
@@ -65,14 +119,40 @@
 
				     </description>
			
 
				   </property>
			
 
				 
			
 
				+
			
 
				+  <property>
			
 
				+    <name>mapred.capacity-scheduler.default-user-limit-factor</name>
			
 
				+    <value>1</value>
			
 
				+    <description>The default multiple of queue-capacity which is used to 
			
 
				+    determine the amount of slots a single user can consume concurrently.
			
 
				+    </description>
			
 
				+  </property>
			
 
				+
			
 
				   <property>
			
 
				-    <name>mapred.capacity-scheduler.default-maximum-initialized-jobs-per-user</name>
			
 
				-    <value>2</value>
			
 
				-    <description>The maximum number of jobs to be pre-initialized for a user
			
 
				-    of the job queue.
			
 
				+    <name>mapred.capacity-scheduler.default-maximum-active-tasks-per-queue</name>
			
 
				+    <value>200000</value>
			
 
				+    <description>The default maximum number of tasks, across all jobs in the 
			
 
				+    queue, which can be initialized concurrently. Once the queue's jobs exceed 
			
 
				+    this limit they will be queued on disk.  
			
 
				     </description>
			
 
				   </property>
			
 
				 
			
 
				+  <property>
			
 
				+    <name>mapred.capacity-scheduler.default-maximum-active-tasks-per-user</name>
			
 
				+    <value>100000</value>
			
 
				+    <description>The default maximum number of tasks per-user, across all the of 
			
 
				+    the user's jobs in the queue, which can be initialized concurrently. Once 
			
 
				+    the user's jobs exceed this limit they will be queued on disk.  
			
 
				+    </description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>mapred.capacity-scheduler.default-init-accept-jobs-factor</name>
			
 
				+    <value>10</value>
			
 
				+    <description>The default multipe of (maximum-system-jobs * queue-capacity) 
			
 
				+    used to determine the number of jobs which are accepted by the scheduler.  
			
 
				+    </description>
			
 
				+  </property>
			
 
				 
			
 
				   <!-- Capacity scheduler Job Initialization configuration parameters -->
			
 
				   <property>
			
--- a/conf/hadoop-metrics.properties
+++ b/conf/hadoop-metrics.properties
@@ -1,40 +0,0 @@
 
				-# Configuration of the "dfs" context for null
			
 
				-dfs.class=org.apache.hadoop.metrics.spi.NullContext
			
 
				-
			
 
				-# Configuration of the "dfs" context for file
			
 
				-#dfs.class=org.apache.hadoop.metrics.file.FileContext
			
 
				-#dfs.period=10
			
 
				-#dfs.fileName=/tmp/dfsmetrics.log
			
 
				-
			
 
				-# Configuration of the "dfs" context for ganglia
			
 
				-# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				-# dfs.period=10
			
 
				-# dfs.servers=localhost:8649
			
 
				-
			
 
				-
			
 
				-# Configuration of the "mapred" context for null
			
 
				-mapred.class=org.apache.hadoop.metrics.spi.NullContext
			
 
				-
			
 
				-# Configuration of the "mapred" context for file
			
 
				-#mapred.class=org.apache.hadoop.metrics.file.FileContext
			
 
				-#mapred.period=10
			
 
				-#mapred.fileName=/tmp/mrmetrics.log
			
 
				-
			
 
				-# Configuration of the "mapred" context for ganglia
			
 
				-# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				-# mapred.period=10
			
 
				-# mapred.servers=localhost:8649
			
 
				-
			
 
				-
			
 
				-# Configuration of the "jvm" context for null
			
 
				-jvm.class=org.apache.hadoop.metrics.spi.NullContext
			
 
				-
			
 
				-# Configuration of the "jvm" context for file
			
 
				-#jvm.class=org.apache.hadoop.metrics.file.FileContext
			
 
				-#jvm.period=10
			
 
				-#jvm.fileName=/tmp/jvmmetrics.log
			
 
				-
			
 
				-# Configuration of the "jvm" context for ganglia
			
 
				-# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
			
 
				-# jvm.period=10
			
 
				-# jvm.servers=localhost:8649
			
--- a/conf/hadoop-metrics2.properties.example
+++ b/conf/hadoop-metrics2.properties.example
@@ -0,0 +1,16 @@
 
				+# syntax: [prefix].[source|sink|jmx].[instance].[options]
			
 
				+# See package.html for org.apache.hadoop.metrics2 for details
			
 
				+
			
 
				+*.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink
			
 
				+
			
 
				+#namenode.sink.file.filename=namenode-metrics.out
			
 
				+
			
 
				+#datanode.sink.file.filename=datanode-metrics.out
			
 
				+
			
 
				+#jobtracker.sink.file.filename=jobtracker-metrics.out
			
 
				+
			
 
				+#tasktracker.sink.file.filename=tasktracker-metrics.out
			
 
				+
			
 
				+#maptask.sink.file.filename=maptask-metrics.out
			
 
				+
			
 
				+#reducetask.sink.file.filename=reducetask-metrics.out
			
--- a/conf/hadoop-policy.xml.template
+++ b/conf/hadoop-policy.xml.template
@@ -94,4 +94,13 @@
 
				     A special value of "*" means all users are allowed.</description>
			
 
				   </property>
			
 
				 
			
 
				+  <property>
			
 
				+    <name>security.admin.operations.protocol.acl</name>
			
 
				+    <value>*</value>
			
 
				+    <description>ACL for AdminOperationsProtocol, used by the mradmins commands
			
 
				+    to refresh queues and nodes at JobTracker. The ACL is a comma-separated list of 
			
 
				+    user and group names. The user and group list is separated by a blank. 
			
 
				+    For e.g. "alice,bob users,wheel". A special value of "*" means all users are 
			
 
				+    allowed.</description>
			
 
				+  </property>
			
 
				 </configuration>
			
--- a/conf/log4j.properties
+++ b/conf/log4j.properties
@@ -3,6 +3,16 @@ hadoop.root.logger=INFO,console
 
				 hadoop.log.dir=.
			
 
				 hadoop.log.file=hadoop.log
			
 
				 
			
 
				+#
			
 
				+# Job Summary Appender 
			
 
				+#
			
 
				+# Use following logger to send summary to separate file defined by 
			
 
				+# hadoop.mapreduce.jobsummary.log.file rolled daily:
			
 
				+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
			
 
				+# 
			
 
				+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
			
 
				+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
			
 
				+
			
 
				 # Define the root logger to the system property "hadoop.root.logger".
			
 
				 log4j.rootLogger=${hadoop.root.logger}, EventCounter
			
 
				 
			
@@ -45,6 +55,7 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}:
 
				 
			
 
				 #Default values
			
 
				 hadoop.tasklog.taskid=null
			
 
				+hadoop.tasklog.iscleanup=false
			
 
				 hadoop.tasklog.noKeepSplits=4
			
 
				 hadoop.tasklog.totalLogFileSize=100
			
 
				 hadoop.tasklog.purgeLogSplits=true
			
@@ -52,11 +63,24 @@ hadoop.tasklog.logsRetainHours=12
 
				 
			
 
				 log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
			
 
				 log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
			
 
				+log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
			
 
				 log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
			
 
				 
			
 
				 log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
			
 
				 log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
			
 
				 
			
 
				+#
			
 
				+#Security audit appender
			
 
				+#
			
 
				+hadoop.security.log.file=SecurityAuth.audit
			
 
				+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 
			
 
				+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
			
 
				+
			
 
				+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
			
 
				+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
			
 
				+#new logger
			
 
				+log4j.category.SecurityLogger=INFO,DRFAS
			
 
				+
			
 
				 #
			
 
				 # Rolling File Appender
			
 
				 #
			
@@ -80,9 +104,11 @@ log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
 
				 
			
 
				 # Custom Logging levels
			
 
				 
			
 
				+hadoop.metrics.log.level=INFO
			
 
				 #log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
			
 
				 #log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
			
 
				 #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
			
 
				+log4j.logger.org.apache.hadoop.metrics2=${hadoop.metrics.log.level}
			
 
				 
			
 
				 # Jets3t library
			
 
				 log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
			
@@ -91,4 +117,15 @@ log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
 
				 # Event Counter Appender
			
 
				 # Sends counts of logging messages at different severity levels to Hadoop Metrics.
			
 
				 #
			
 
				-log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
			
 
				+log4j.appender.EventCounter=org.apache.hadoop.log.EventCounter
			
 
				+
			
 
				+#
			
 
				+# Job Summary Appender
			
 
				+#
			
 
				+log4j.appender.JSA=org.apache.log4j.DailyRollingFileAppender
			
 
				+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
			
 
				+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
			
 
				+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
			
 
				+log4j.appender.JSA.DatePattern=.yyyy-MM-dd
			
 
				+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
			
 
				+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
			
--- a/conf/mapred-queue-acls.xml
+++ b/conf/mapred-queue-acls.xml
@@ -0,0 +1,49 @@
 
				+<?xml version="1.0"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+
			
 
				+<!-- This is a template file for queue acls configuration properties -->
			
 
				+
			
 
				+<configuration>
			
 
				+
			
 
				+<property>
			
 
				+  <name>mapred.queue.default.acl-submit-job</name>
			
 
				+  <value> </value>
			
 
				+  <description> Comma separated list of user and group names that are allowed
			
 
				+    to submit jobs to the 'default' queue. The user list and the group list
			
 
				+    are separated by a blank. For e.g. user1,user2 group1,group2. 
			
 
				+    If set to the special value '*', it means all users are allowed to 
			
 
				+    submit jobs. If set to ' '(i.e. space), no user will be allowed to submit
			
 
				+    jobs.
			
 
				+
			
 
				+    It is only used if authorization is enabled in Map/Reduce by setting the
			
 
				+    configuration property mapred.acls.enabled to true.
			
 
				+
			
 
				+    Irrespective of this ACL configuration, the user who started the cluster and
			
 
				+    cluster administrators configured via
			
 
				+    mapreduce.cluster.administrators can submit jobs.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>mapred.queue.default.acl-administer-jobs</name>
			
 
				+  <value> </value>
			
 
				+  <description> Comma separated list of user and group names that are allowed
			
 
				+    to view job details, kill jobs or modify job's priority for all the jobs
			
 
				+    in the 'default' queue. The user list and the group list
			
 
				+    are separated by a blank. For e.g. user1,user2 group1,group2. 
			
 
				+    If set to the special value '*', it means all users are allowed to do 
			
 
				+    this operation. If set to ' '(i.e. space), no user will be allowed to do
			
 
				+    this operation.
			
 
				+
			
 
				+    It is only used if authorization is enabled in Map/Reduce by setting the
			
 
				+    configuration property mapred.acls.enabled to true.
			
 
				+
			
 
				+    Irrespective of this ACL configuration, the user who started the cluster and
			
 
				+    cluster administrators configured via
			
 
				+    mapreduce.cluster.administrators can do the above operations on all the jobs
			
 
				+    in all the queues. The job owner can do all the above operations on his/her
			
 
				+    job irrespective of this ACL configuration.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+</configuration>
			
--- a/conf/mapred-queue-acls.xml.template
+++ b/conf/mapred-queue-acls.xml.template
@@ -0,0 +1,49 @@
 
				+<?xml version="1.0"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+
			
 
				+<!-- This is a template file for queue acls configuration properties -->
			
 
				+
			
 
				+<configuration>
			
 
				+
			
 
				+<property>
			
 
				+  <name>mapred.queue.default.acl-submit-job</name>
			
 
				+  <value> </value>
			
 
				+  <description> Comma separated list of user and group names that are allowed
			
 
				+    to submit jobs to the 'default' queue. The user list and the group list
			
 
				+    are separated by a blank. For e.g. user1,user2 group1,group2. 
			
 
				+    If set to the special value '*', it means all users are allowed to 
			
 
				+    submit jobs. If set to ' '(i.e. space), no user will be allowed to submit
			
 
				+    jobs.
			
 
				+
			
 
				+    It is only used if authorization is enabled in Map/Reduce by setting the
			
 
				+    configuration property mapred.acls.enabled to true.
			
 
				+
			
 
				+    Irrespective of this ACL configuration, the user who started the cluster and
			
 
				+    cluster administrators configured via
			
 
				+    mapreduce.cluster.administrators can submit jobs.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>mapred.queue.default.acl-administer-jobs</name>
			
 
				+  <value> </value>
			
 
				+  <description> Comma separated list of user and group names that are allowed
			
 
				+    to view job details, kill jobs or modify job's priority for all the jobs
			
 
				+    in the 'default' queue. The user list and the group list
			
 
				+    are separated by a blank. For e.g. user1,user2 group1,group2. 
			
 
				+    If set to the special value '*', it means all users are allowed to do 
			
 
				+    this operation. If set to ' '(i.e. space), no user will be allowed to do
			
 
				+    this operation.
			
 
				+
			
 
				+    It is only used if authorization is enabled in Map/Reduce by setting the
			
 
				+    configuration property mapred.acls.enabled to true.
			
 
				+
			
 
				+    Irrespective of this ACL configuration, the user who started the cluster and
			
 
				+    cluster administrators configured via
			
 
				+    mapreduce.cluster.administrators can do the above operations on all the jobs
			
 
				+    in all the queues. The job owner can do all the above operations on his/her
			
 
				+    job irrespective of this ACL configuration.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+</configuration>
			
--- a/conf/taskcontroller.cfg
+++ b/conf/taskcontroller.cfg
@@ -0,0 +1,4 @@
 
				+mapred.local.dir=#configured value of mapred.local.dir. It can be a list of comma separated paths.
			
 
				+hadoop.log.dir=#configured value of hadoop.log.dir.
			
 
				+mapred.tasktracker.tasks.sleeptime-before-sigkill=#sleep time before sig kill is to be sent to process group after sigterm is sent. Should be in seconds
			
 
				+mapreduce.tasktracker.group=#configured value of mapreduce.tasktracker.group.
			
--- a/ivy.xml
+++ b/ivy.xml
@@ -126,6 +126,16 @@
 
				       rev="${commons-net.version}"
			
 
				       conf="ftp->default"/>
			
 
				 
			
 
				+    <dependency org="commons-configuration"
			
 
				+      name="commons-configuration"
			
 
				+      rev="${commons-configuration.version}"
			
 
				+      conf="common->default"/>
			
 
				+
			
 
				+    <dependency org="org.apache.commons"
			
 
				+      name="commons-math"
			
 
				+      rev="${commons-math.version}"
			
 
				+      conf="common->default"/>
			
 
				+
			
 
				     <!--Configuration: Jetty -->
			
 
				 
			
 
				 <!-- <dependency org="javax.servlet"
			
@@ -135,7 +145,7 @@
 
				     <dependency org="org.mortbay.jetty"
			
 
				       name="jetty"
			
 
				       rev="${jetty.version}"
			
 
				-      conf="jetty->master"/>
			
 
				+      conf="jetty->default"/>
			
 
				     <dependency org="org.mortbay.jetty"
			
 
				       name="jetty-util"
			
 
				       rev="${jetty-util.version}"
			
@@ -159,7 +169,18 @@
 
				       rev="${commons-el.version}"
			
 
				       conf="jetty->master"/>
			
 
				 
			
 
				+    <!--Configuration: secure datanode -->
			
 
				+    <dependency org="commons-daemon" 
			
 
				+      name="commons-daemon" 
			
 
				+      rev="${commons-daemon.version}"
			
 
				+      conf="server->default" /> 
			
 
				 
			
 
				+    <dependency org="com.jcraft"
			
 
				+      name="jsch"
			
 
				+      rev="${jsch.version}"
			
 
				+      conf="common->master">
			
 
				+    </dependency>
			
 
				+    
			
 
				     <!--Configuration: commons-logging -->
			
 
				 
			
 
				     <!--it is essential that only the master JAR of commons logging
			
@@ -191,10 +212,6 @@
 
				       name="commons-net"
			
 
				       rev="${commons-net.version}"
			
 
				       conf="s3-client->master"/> 
			
 
				-    <dependency org="org.mortbay.jetty"
			
 
				-      name="servlet-api-2.5"
			
 
				-      rev="${servlet-api-2.5.version}"
			
 
				-      conf="s3-client->master"/>
			
 
				 
			
 
				     <!--Configuration: kfs -->
			
 
				 
			
@@ -256,6 +273,24 @@
 
				       rev="${slf4j-log4j12.version}"
			
 
				       conf="common->master">
			
 
				     </dependency>
			
 
				-    </dependencies>
			
 
				-  
			
 
				+    <dependency org="org.codehaus.jackson"
			
 
				+      name="jackson-mapper-asl"
			
 
				+      rev="1.0.1"
			
 
				+      conf="common->default"/>
			
 
				+   <dependency org="org.mockito" 
			
 
				+       name="mockito-all" 
			
 
				+       rev="${mockito-all.version}" 
			
 
				+       conf="common->default">
			
 
				+    </dependency>
			
 
				+    <dependency org="org.aspectj"
			
 
				+      name="aspectjrt"
			
 
				+      rev="${aspectj.version}"
			
 
				+      conf="common->default">
			
 
				+    </dependency>
			
 
				+    <dependency org="org.aspectj"
			
 
				+      name="aspectjtools"
			
 
				+      rev="${aspectj.version}"
			
 
				+      conf="common->default">
			
 
				+    </dependency>
			
 
				+ </dependencies>
			
 
				 </ivy-module>
			
--- a/ivy/hadoop-core-pom-template.xml
+++ b/ivy/hadoop-core-pom-template.xml
@@ -0,0 +1,127 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <groupId>org.apache.hadoop</groupId>
			
 
				+  <artifactId>hadoop-core</artifactId>
			
 
				+  <packaging>jar</packaging>
			
 
				+  <version>@version</version>
			
 
				+  <dependencies>
			
 
				+   <dependency>
			
 
				+      <groupId>commons-cli</groupId>
			
 
				+      <artifactId>commons-cli</artifactId>
			
 
				+      <version>1.2</version>
			
 
				+    </dependency>
			
 
				+   <dependency>
			
 
				+      <groupId>xmlenc</groupId>
			
 
				+      <artifactId>xmlenc</artifactId>
			
 
				+      <version>0.52</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-httpclient</groupId>
			
 
				+      <artifactId>commons-httpclient</artifactId>
			
 
				+      <version>3.0.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-codec</groupId>
			
 
				+      <artifactId>commons-codec</artifactId>
			
 
				+      <version>1.4</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.commons</groupId>
			
 
				+      <artifactId>commons-math</artifactId>
			
 
				+      <version>2.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-configuration</groupId>
			
 
				+      <artifactId>commons-configuration</artifactId>
			
 
				+      <version>1.6</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-net</groupId>
			
 
				+      <artifactId>commons-net</artifactId>
			
 
				+      <version>1.4.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.mortbay.jetty</groupId>
			
 
				+      <artifactId>jetty</artifactId>
			
 
				+      <version>6.1.26</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.mortbay.jetty</groupId>
			
 
				+      <artifactId>jetty-util</artifactId>
			
 
				+      <version>6.1.26</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>tomcat</groupId>
			
 
				+      <artifactId>jasper-runtime</artifactId>
			
 
				+      <version>5.5.12</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>tomcat</groupId>
			
 
				+      <artifactId>jasper-compiler</artifactId>
			
 
				+      <version>5.5.12</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.mortbay.jetty</groupId>
			
 
				+      <artifactId>jsp-api-2.1</artifactId>
			
 
				+      <version>6.1.14</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.mortbay.jetty</groupId>
			
 
				+      <artifactId>jsp-2.1</artifactId>
			
 
				+      <version>6.1.14</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-el</groupId>
			
 
				+      <artifactId>commons-el</artifactId>
			
 
				+      <version>1.0</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>net.java.dev.jets3t</groupId>
			
 
				+      <artifactId>jets3t</artifactId>
			
 
				+      <version>0.7.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>commons-net</groupId>
			
 
				+      <artifactId>commons-net</artifactId>
			
 
				+      <version>1.4.1</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>net.sf.kosmosfs</groupId>
			
 
				+      <artifactId>kfs</artifactId>
			
 
				+      <version>0.3</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>hsqldb</groupId>
			
 
				+      <artifactId>hsqldb</artifactId>
			
 
				+      <version>1.8.0.10</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>oro</groupId>
			
 
				+      <artifactId>oro</artifactId>
			
 
				+      <version>2.0.8</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.eclipse.jdt</groupId>
			
 
				+      <artifactId>core</artifactId>
			
 
				+      <version>3.1.1</version>
			
 
				+    </dependency>
			
 
				+  </dependencies>
			
 
				+</project>
			
--- a/ivy/hadoop-core.pom
+++ b/ivy/hadoop-core.pom
@@ -132,7 +132,7 @@
 
				     <dependency>
			
 
				       <groupId>commons-codec</groupId>
			
 
				       <artifactId>commons-codec</artifactId>
			
 
				-      <version>1.3</version>
			
 
				+      <version>1.4</version>
			
 
				       <scope>optional</scope>
			
 
				     </dependency>
			
 
				 
			
@@ -253,5 +253,11 @@
 
				       <version>0.52</version>
			
 
				       <scope>optional</scope>
			
 
				     </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.directory.daemon</groupId>
			
 
				+      <artifactId>daemon-plugin</artifactId>
			
 
				+      <version>${commons-daemon.version}</version>
			
 
				+    </dependency>
			
 
				+
			
 
				   </dependencies>
			
 
				 </project>
			
--- a/ivy/hadoop-examples-pom-template.xml
+++ b/ivy/hadoop-examples-pom-template.xml
@@ -0,0 +1,34 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+ <!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <groupId>org.apache.hadoop</groupId>
			
 
				+  <artifactId>hadoop-examples</artifactId>
			
 
				+  <packaging>jar</packaging>
			
 
				+  <version>@version</version>
			
 
				+  <dependencies>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-core</artifactId>
			
 
				+      <version>@version</version>
			
 
				+    </dependency>
			
 
				+  </dependencies>
			
 
				+</project>
			
--- a/ivy/hadoop-streaming-pom-template.xml
+++ b/ivy/hadoop-streaming-pom-template.xml
@@ -0,0 +1,34 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+ <!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <groupId>org.apache.hadoop</groupId>
			
 
				+  <artifactId>hadoop-streaming</artifactId>
			
 
				+  <packaging>jar</packaging>
			
 
				+  <version>@version</version>
			
 
				+  <dependencies>
			
 
				+      <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-core</artifactId>
			
 
				+      <version>@version</version>
			
 
				+      </dependency>
			
 
				+  </dependencies>
			
 
				+</project>
			
--- a/ivy/hadoop-test-pom-template.xml
+++ b/ivy/hadoop-test-pom-template.xml
@@ -0,0 +1,53 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <groupId>org.apache.hadoop</groupId>
			
 
				+  <artifactId>hadoop-test</artifactId>
			
 
				+  <packaging>jar</packaging>
			
 
				+  <version>@version</version>
			
 
				+  <dependencies>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-core</artifactId>
			
 
				+      <version>@version</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.ftpserver</groupId>
			
 
				+      <artifactId>ftplet-api</artifactId>
			
 
				+      <version>1.0.0</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.mina</groupId>
			
 
				+      <artifactId>mina-core</artifactId>
			
 
				+      <version>2.0.0-M5</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.ftpserver</groupId>
			
 
				+      <artifactId>ftpserver-core</artifactId>
			
 
				+      <version>1.0.0</version>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.ftpserver</groupId>
			
 
				+      <artifactId>ftpserver-deprecated</artifactId>
			
 
				+      <version>1.0.0-M2</version>
			
 
				+    </dependency>
			
 
				+  </dependencies>
			
 
				+</project>
			
--- a/ivy/hadoop-tools-pom-template.xml
+++ b/ivy/hadoop-tools-pom-template.xml
@@ -0,0 +1,34 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+ <!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+
			
 
				+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
			
 
				+
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <groupId>org.apache.hadoop</groupId>
			
 
				+  <artifactId>hadoop-tools</artifactId>
			
 
				+  <packaging>jar</packaging>
			
 
				+  <version>@version</version>
			
 
				+  <dependencies>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-core</artifactId>
			
 
				+      <version>@version</version>
			
 
				+    </dependency>
			
 
				+  </dependencies>
			
 
				+</project>
			
--- a/ivy/ivysettings.xml
+++ b/ivy/ivysettings.xml
@@ -32,6 +32,9 @@
 
				   <property name="repo.maven.org"
			
 
				     value="http://repo1.maven.org/maven2/"
			
 
				     override="false"/>
			
 
				+  <property name="oss.sonatype.org"
			
 
				+    value="https://oss.sonatype.org/content/groups/public/"
			
 
				+    override="false"/>
			
 
				   <property name="snapshot.apache.org"
			
 
				     value="http://people.apache.org/repo/m2-snapshot-repository/"
			
 
				     override="false"/>
			
@@ -53,9 +56,15 @@
 
				       pattern="${maven2.pattern.ext}"
			
 
				       m2compatible="true"
			
 
				       />
			
 
				+    <ibiblio name="oss-sonatype"
			
 
				+      root="${oss.sonatype.org}"
			
 
				+      pattern="${maven2.pattern.ext}"
			
 
				+      m2compatible="true"
			
 
				+      />
			
 
				     <chain name="default" dual="true">
			
 
				       <resolver ref="local"/>
			
 
				       <resolver ref="maven2"/>
			
 
				+      <resolver ref="oss-sonatype"/>
			
 
				     </chain>
			
 
				     <chain name="internal">
			
 
				       <resolver ref="local"/>
			
--- a/ivy/libraries.properties
+++ b/ivy/libraries.properties
@@ -15,19 +15,27 @@
 
				 
			
 
				 # This is the version of hadoop we are generating
			
 
				 hadoop.version=0.20.0
			
 
				+hadoop-gpl-compression.version=0.1.0
			
 
				 
			
 
				 #These are the versions of our dependencies (in alphabetical order)
			
 
				 apacheant.version=1.7.0
			
 
				+ant-task.version=2.0.10
			
 
				+
			
 
				+aspectj.version=1.6.5
			
 
				 
			
 
				 checkstyle.version=4.2
			
 
				 
			
 
				+jsch.version=0.1.42
			
 
				 commons-cli.version=1.2
			
 
				-commons-codec.version=1.3
			
 
				+commons-codec.version=1.4
			
 
				 commons-collections.version=3.1
			
 
				+commons-configuration.version=1.6
			
 
				+commons-daemon.version=1.0.1
			
 
				 commons-httpclient.version=3.0.1
			
 
				 commons-lang.version=2.4
			
 
				 commons-logging.version=1.0.4
			
 
				 commons-logging-api.version=1.0.4
			
 
				+commons-math.version=2.1
			
 
				 commons-el.version=1.0
			
 
				 commons-fileupload.version=1.2
			
 
				 commons-io.version=1.4
			
@@ -37,18 +45,19 @@ coreplugin.version=1.3.2
 
				 
			
 
				 hsqldb.version=1.8.0.10
			
 
				 
			
 
				-#ivy.version=2.0.0-beta2
			
 
				-ivy.version=2.0.0-rc2
			
 
				+ivy.version=2.1.0
			
 
				 
			
 
				 jasper.version=5.5.12
			
 
				 #not able to figureout the version of jsp & jsp-api version to get it resolved throught ivy
			
 
				 # but still declared here as we are going to have a local copy from the lib folder
			
 
				 jsp.version=2.1
			
 
				 jsp-api.version=5.5.12
			
 
				+jsp-api-2.1.version=6.1.14
			
 
				+jsp-2.1.version=6.1.14
			
 
				 jets3t.version=0.6.1
			
 
				-jetty.version=6.1.14
			
 
				-jetty-util.version=6.1.14
			
 
				-junit.version=3.8.1
			
 
				+jetty.version=6.1.26
			
 
				+jetty-util.version=6.1.26
			
 
				+junit.version=4.5
			
 
				 jdiff.version=1.0.9
			
 
				 json.version=1.0
			
 
				 
			
@@ -57,15 +66,17 @@ kfs.version=0.1
 
				 log4j.version=1.2.15
			
 
				 lucene-core.version=2.3.1
			
 
				 
			
 
				+mockito-all.version=1.8.5
			
 
				+
			
 
				 oro.version=2.0.8
			
 
				 
			
 
				 rats-lib.version=0.5.1
			
 
				 
			
 
				 servlet.version=4.0.6
			
 
				-servlet-api-2.5.version=6.1.14
			
 
				 servlet-api.version=2.5
			
 
				 slf4j-api.version=1.4.3
			
 
				 slf4j-log4j12.version=1.4.3
			
 
				 
			
 
				+wagon-http.version=1.0-beta-2
			
 
				 xmlenc.version=0.52
			
 
				 xerces.version=1.4.4
			
--- a/lib/jdiff/hadoop_0.20.1.xml
+++ b/lib/jdiff/hadoop_0.20.1.xml
--- a/lib/jdiff/hadoop_0.20.100.xml
+++ b/lib/jdiff/hadoop_0.20.100.xml
--- a/src/c++/libhdfs/autom4te.cache/output.0t
+++ b/src/c++/libhdfs/autom4te.cache/output.0t
--- a/src/c++/libhdfs/autom4te.cache/requests
+++ b/src/c++/libhdfs/autom4te.cache/requests
--- a/src/c++/libhdfs/autom4te.cache/traces.0t
+++ b/src/c++/libhdfs/autom4te.cache/traces.0t
@@ -0,0 +1,8 @@
 
				+m4trace:aclocal.m4:1021: -1- m4_include([m4/apfunctions.m4])
			
 
				+m4trace:aclocal.m4:1022: -1- m4_include([m4/apjava.m4])
			
 
				+m4trace:aclocal.m4:1023: -1- m4_include([m4/apsupport.m4])
			
 
				+m4trace:aclocal.m4:1024: -1- m4_include([m4/libtool.m4])
			
 
				+m4trace:aclocal.m4:1025: -1- m4_include([m4/ltoptions.m4])
			
 
				+m4trace:aclocal.m4:1026: -1- m4_include([m4/ltsugar.m4])
			
 
				+m4trace:aclocal.m4:1027: -1- m4_include([m4/ltversion.m4])
			
 
				+m4trace:aclocal.m4:1028: -1- m4_include([m4/lt~obsolete.m4])
			
--- a/src/c++/libhdfs/hdfs.c
+++ b/src/c++/libhdfs/hdfs.c
@@ -31,8 +31,6 @@
 
				 #define HADOOP_OSTRM    "org/apache/hadoop/fs/FSDataOutputStream"
			
 
				 #define HADOOP_STAT     "org/apache/hadoop/fs/FileStatus"
			
 
				 #define HADOOP_FSPERM   "org/apache/hadoop/fs/permission/FsPermission"
			
 
				-#define HADOOP_UNIX_USER_GROUP_INFO "org/apache/hadoop/security/UnixUserGroupInformation"
			
 
				-#define HADOOP_USER_GROUP_INFO "org/apache/hadoop/security/UserGroupInformation"
			
 
				 #define JAVA_NET_ISA    "java/net/InetSocketAddress"
			
 
				 #define JAVA_NET_URI    "java/net/URI"
			
 
				 #define JAVA_STRING     "java/lang/String"
			
@@ -168,12 +166,12 @@ done:
 
				 
			
 
				 
			
 
				 hdfsFS hdfsConnect(const char* host, tPort port) {
			
 
				-  // conect with NULL as user name/groups
			
 
				-  return hdfsConnectAsUser(host, port, NULL, NULL, 0);
			
 
				+  // conect with NULL as user name
			
 
				+  return hdfsConnectAsUser(host, port, NULL);
			
 
				 }
			
 
				 
			
 
				 
			
 
				-hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char *user , const char **groups, int groups_size )
			
 
				+hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char *user)
			
 
				 {
			
 
				     // JAVA EQUIVALENT:
			
 
				     //  FileSystem fs = FileSystem.get(new Configuration());
			
@@ -188,6 +186,7 @@ hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char *user , const
 
				     jthrowable jExc = NULL;
			
 
				     char    *cURI = 0;
			
 
				     jobject gFsRef = NULL;
			
 
				+    jstring jUserString = NULL;
			
 
				 
			
 
				 
			
 
				     //Get the JNIEnv* corresponding to current thread
			
@@ -209,81 +208,8 @@ hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char *user , const
 
				     }
			
 
				  
			
 
				     if (user != NULL) {
			
 
				-
			
 
				-      if (groups == NULL || groups_size <= 0) {
			
 
				-        fprintf(stderr, "ERROR: groups must not be empty/null\n");
			
 
				-        errno = EINVAL;
			
 
				-        return NULL;
			
 
				-      }
			
 
				-
			
 
				-      jstring jUserString = (*env)->NewStringUTF(env, user);
			
 
				-      jarray jGroups = constructNewArrayString(env, &jExc, groups, groups_size);
			
 
				-      if (jGroups == NULL) {
			
 
				-        errno = EINTERNAL;
			
 
				-        fprintf(stderr, "ERROR: could not construct groups array\n");
			
 
				-        return NULL;
			
 
				-      }
			
 
				-
			
 
				-      jobject jUgi;
			
 
				-      if ((jUgi = constructNewObjectOfClass(env, &jExc, HADOOP_UNIX_USER_GROUP_INFO, JMETHOD2(JPARAM(JAVA_STRING), JARRPARAM(JAVA_STRING), JAVA_VOID), jUserString, jGroups)) == NULL) {
			
 
				-        fprintf(stderr,"failed to construct hadoop user unix group info object\n");
			
 
				-        errno = errnoFromException(jExc, env, HADOOP_UNIX_USER_GROUP_INFO,
			
 
				-                                   "init");
			
 
				-        destroyLocalReference(env, jConfiguration);
			
 
				-        destroyLocalReference(env, jUserString);
			
 
				-        if (jGroups != NULL) {
			
 
				-          destroyLocalReference(env, jGroups);
			
 
				-        }          
			
 
				-        return NULL;
			
 
				-      }
			
 
				-#define USE_UUGI
			
 
				-#ifdef USE_UUGI
			
 
				-
			
 
				-      // UnixUserGroupInformation.UGI_PROPERTY_NAME
			
 
				-      jstring jAttrString = (*env)->NewStringUTF(env,"hadoop.job.ugi");
			
 
				-      
			
 
				-      if (invokeMethod(env, &jVal, &jExc, STATIC, NULL, HADOOP_UNIX_USER_GROUP_INFO, "saveToConf",
			
 
				-                       JMETHOD3(JPARAM(HADOOP_CONF), JPARAM(JAVA_STRING), JPARAM(HADOOP_UNIX_USER_GROUP_INFO), JAVA_VOID),
			
 
				-                       jConfiguration, jAttrString, jUgi) != 0) {
			
 
				-        errno = errnoFromException(jExc, env, HADOOP_FSPERM,
			
 
				-                                   "init");
			
 
				-        destroyLocalReference(env, jConfiguration);
			
 
				-        destroyLocalReference(env, jUserString);
			
 
				-        if (jGroups != NULL) {
			
 
				-          destroyLocalReference(env, jGroups);
			
 
				-        }          
			
 
				-        destroyLocalReference(env, jUgi);
			
 
				-        return NULL;
			
 
				-      }
			
 
				-
			
 
				-      destroyLocalReference(env, jUserString);
			
 
				-      destroyLocalReference(env, jGroups);
			
 
				-      destroyLocalReference(env, jUgi);
			
 
				+      jUserString = (*env)->NewStringUTF(env, user);
			
 
				     }
			
 
				-#else
			
 
				-    
			
 
				-    // what does "current" mean in the context of libhdfs ? does it mean for the last hdfs connection we used?
			
 
				-    // that's why this code cannot be activated. We know the above use of the conf object should work well with 
			
 
				-    // multiple connections.
			
 
				-      if (invokeMethod(env, &jVal, &jExc, STATIC, NULL, HADOOP_USER_GROUP_INFO, "setCurrentUGI",
			
 
				-                       JMETHOD1(JPARAM(HADOOP_USER_GROUP_INFO), JAVA_VOID),
			
 
				-                       jUgi) != 0) {
			
 
				-        errno = errnoFromException(jExc, env, HADOOP_USER_GROUP_INFO,
			
 
				-                                   "setCurrentUGI");
			
 
				-        destroyLocalReference(env, jConfiguration);
			
 
				-        destroyLocalReference(env, jUserString);
			
 
				-        if (jGroups != NULL) {
			
 
				-          destroyLocalReference(env, jGroups);
			
 
				-        }          
			
 
				-        destroyLocalReference(env, jUgi);
			
 
				-        return NULL;
			
 
				-      }
			
 
				-
			
 
				-      destroyLocalReference(env, jUserString);
			
 
				-      destroyLocalReference(env, jGroups);
			
 
				-      destroyLocalReference(env, jUgi);
			
 
				-    }
			
 
				-#endif      
			
 
				     //Check what type of FileSystem the caller wants...
			
 
				     if (host == NULL) {
			
 
				         // fs = FileSytem::getLocal(conf);
			
@@ -297,43 +223,61 @@ hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char *user , const
 
				         }
			
 
				         jFS = jVal.l;
			
 
				     }
			
 
				+    //FileSystem.get(conf) -> FileSystem.get(FileSystem.getDefaultUri(conf), 
			
 
				+    //                                       conf, user)
			
 
				     else if (!strcmp(host, "default") && port == 0) {
			
 
				-        //fs = FileSystem::get(conf); 
			
 
				-        if (invokeMethod(env, &jVal, &jExc, STATIC, NULL,
			
 
				-                         HADOOP_FS, "get",
			
 
				-                         JMETHOD1(JPARAM(HADOOP_CONF),
			
 
				-                                  JPARAM(HADOOP_FS)),
			
 
				-                         jConfiguration) != 0) {
			
 
				-            errno = errnoFromException(jExc, env, "org.apache.hadoop.fs."
			
 
				-                                       "FileSystem::get");
			
 
				-            goto done;
			
 
				-        }
			
 
				-        jFS = jVal.l;
			
 
				+      if (invokeMethod(env, &jVal, &jExc, STATIC, NULL, HADOOP_FS,
			
 
				+                      "getDefaultUri", 
			
 
				+                      "(Lorg/apache/hadoop/conf/Configuration;)Ljava/net/URI;",
			
 
				+                      jConfiguration) != 0) {
			
 
				+        errno = errnoFromException(jExc, env, "org.apache.hadoop.fs.", 
			
 
				+                                   "FileSystem::getDefaultUri");
			
 
				+        goto done;
			
 
				+      }
			
 
				+      jURI = jVal.l;
			
 
				+      if (invokeMethod(env, &jVal, &jExc, STATIC, NULL, HADOOP_FS, "get",
			
 
				+                       JMETHOD3(JPARAM(JAVA_NET_URI),
			
 
				+                                JPARAM(HADOOP_CONF), JPARAM(JAVA_STRING), 
			
 
				+                                JPARAM(HADOOP_FS)),
			
 
				+                       jURI, jConfiguration, jUserString) != 0) {
			
 
				+        errno = errnoFromException(jExc, env, "org.apache.hadoop.fs."
			
 
				+                                   "Filesystem::get(URI, Configuration)");
			
 
				+        goto done;
			
 
				+      }
			
 
				+
			
 
				+      jFS = jVal.l;
			
 
				     }
			
 
				     else {
			
 
				-        // fs = FileSystem::get(URI, conf);
			
 
				-        cURI = malloc(strlen(host)+16);
			
 
				-        sprintf(cURI, "hdfs://%s:%d", host, (int)(port));
			
 
				-
			
 
				-        jURIString = (*env)->NewStringUTF(env, cURI);
			
 
				-        if (invokeMethod(env, &jVal, &jExc, STATIC, NULL, JAVA_NET_URI,
			
 
				-                         "create", "(Ljava/lang/String;)Ljava/net/URI;",
			
 
				-                         jURIString) != 0) {
			
 
				-            errno = errnoFromException(jExc, env, "java.net.URI::create");
			
 
				-            goto done;
			
 
				-        }
			
 
				-        jURI = jVal.l;
			
 
				+      // fs = FileSystem::get(URI, conf, ugi);
			
 
				+      cURI = malloc(strlen(host)+16);
			
 
				+      sprintf(cURI, "hdfs://%s:%d", host, (int)(port));
			
 
				+      if (cURI == NULL) {
			
 
				+        fprintf (stderr, "Couldn't allocate an object of size %d",
			
 
				+                 strlen(host) + 16);
			
 
				+        errno = EINTERNAL;			
			
 
				+        goto done;	
			
 
				+      }
			
 
				 
			
 
				-        if (invokeMethod(env, &jVal, &jExc, STATIC, NULL, HADOOP_FS, "get",
			
 
				-                         JMETHOD2(JPARAM(JAVA_NET_URI),
			
 
				-                                  JPARAM(HADOOP_CONF), JPARAM(HADOOP_FS)),
			
 
				-                         jURI, jConfiguration) != 0) {
			
 
				-            errno = errnoFromException(jExc, env, "org.apache.hadoop.fs."
			
 
				-                                       "Filesystem::get(URI, Configuration)");
			
 
				-            goto done;
			
 
				-        }
			
 
				+      jURIString = (*env)->NewStringUTF(env, cURI);
			
 
				+      if (invokeMethod(env, &jVal, &jExc, STATIC, NULL, JAVA_NET_URI,
			
 
				+                       "create", "(Ljava/lang/String;)Ljava/net/URI;",
			
 
				+                       jURIString) != 0) {
			
 
				+        errno = errnoFromException(jExc, env, "java.net.URI::create");
			
 
				+        goto done;
			
 
				+      }
			
 
				+      jURI = jVal.l;
			
 
				 
			
 
				-        jFS = jVal.l;
			
 
				+      if (invokeMethod(env, &jVal, &jExc, STATIC, NULL, HADOOP_FS, "get",
			
 
				+                       JMETHOD3(JPARAM(JAVA_NET_URI),
			
 
				+                                JPARAM(HADOOP_CONF), JPARAM(JAVA_STRING),
			
 
				+                                JPARAM(HADOOP_FS)),
			
 
				+                       jURI, jConfiguration, jUserString) != 0) {
			
 
				+        errno = errnoFromException(jExc, env, "org.apache.hadoop.fs."
			
 
				+                                   "Filesystem::get(URI, Configuration)");
			
 
				+        goto done;
			
 
				+      }
			
 
				+
			
 
				+      jFS = jVal.l;
			
 
				     }
			
 
				 
			
 
				   done:
			
@@ -342,6 +286,7 @@ hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char *user , const
 
				     destroyLocalReference(env, jConfiguration);
			
 
				     destroyLocalReference(env, jURIString);
			
 
				     destroyLocalReference(env, jURI);
			
 
				+    destroyLocalReference(env, jUserString);
			
 
				 
			
 
				     if (cURI) free(cURI);
			
 
				 
			
@@ -539,12 +484,12 @@ hdfsFile hdfsOpenFile(hdfsFS fs, const char* path, int flags,
 
				     file = malloc(sizeof(struct hdfsFile_internal));
			
 
				     if (!file) {
			
 
				         errno = ENOMEM;
			
 
				-        return NULL;
			
 
				-    }
			
 
				-    file->file = (*env)->NewGlobalRef(env, jVal.l);
			
 
				-    file->type = (((flags & O_WRONLY) == 0) ? INPUT : OUTPUT);
			
 
				+    } else {
			
 
				+        file->file = (*env)->NewGlobalRef(env, jVal.l);
			
 
				+        file->type = (((flags & O_WRONLY) == 0) ? INPUT : OUTPUT);
			
 
				 
			
 
				-    destroyLocalReference(env, jVal.l);
			
 
				+        destroyLocalReference(env, jVal.l);
			
 
				+    }
			
 
				 
			
 
				     done:
			
 
				 
			
@@ -626,9 +571,11 @@ int hdfsExists(hdfsFS fs, const char *path)
 
				                      jPath) != 0) {
			
 
				         errno = errnoFromException(jExc, env, "org.apache.hadoop.fs."
			
 
				                                    "FileSystem::exists");
			
 
				+        destroyLocalReference(env, jPath);
			
 
				         return -1;
			
 
				     }
			
 
				 
			
 
				+    destroyLocalReference(env, jPath);
			
 
				     return jVal.z ? 0 : -1;
			
 
				 }
			
 
				 
			
@@ -1144,6 +1091,7 @@ int hdfsDelete(hdfsFS fs, const char* path)
 
				                      jPath) != 0) {
			
 
				         errno = errnoFromException(jExc, env, "org.apache.hadoop.fs."
			
 
				                                    "FileSystem::delete");
			
 
				+        destroyLocalReference(env, jPath);
			
 
				         return -1;
			
 
				     }
			
 
				 
			
@@ -1194,6 +1142,8 @@ int hdfsRename(hdfsFS fs, const char* oldPath, const char* newPath)
 
				                      jOldPath, jNewPath) != 0) {
			
 
				         errno = errnoFromException(jExc, env, "org.apache.hadoop.fs."
			
 
				                                    "FileSystem::rename");
			
 
				+        destroyLocalReference(env, jOldPath);
			
 
				+        destroyLocalReference(env, jNewPath);
			
 
				         return -1;
			
 
				     }
			
 
				 
			
@@ -1457,6 +1407,7 @@ int hdfsChmod(hdfsFS fs, const char* path, short mode)
 
				     //Create an object of org.apache.hadoop.fs.Path
			
 
				     jobject jPath = constructNewObjectOfPath(env, path);
			
 
				     if (jPath == NULL) {
			
 
				+      destroyLocalReference(env, jPermObj);
			
 
				       return -3;
			
 
				     }
			
 
				 
			
--- a/src/c++/libhdfs/hdfs.h
+++ b/src/c++/libhdfs/hdfs.h
@@ -101,10 +101,9 @@ extern  "C" {
 
				      * (core-site/core-default.xml).
			
 
				      * @param port The port on which the server is listening.
			
 
				      * @param user the user name (this is hadoop domain user). Or NULL is equivelant to hhdfsConnect(host, port)
			
 
				-     * @param groups the groups (these are hadoop domain groups)
			
 
				      * @return Returns a handle to the filesystem or NULL on error.
			
 
				      */
			
 
				-     hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char *user , const char *groups[], int groups_size );
			
 
				+     hdfsFS hdfsConnectAsUser(const char* host, tPort port, const char *user);
			
 
				 
			
 
				 
			
 
				     /** 
			
--- a/src/c++/libhdfs/hdfsJniHelper.c
+++ b/src/c++/libhdfs/hdfsJniHelper.c
@@ -242,6 +242,7 @@ jarray constructNewArrayString(JNIEnv *env, Exc *exc, const char **elements, int
 
				       fprintf(stderr, "ERROR: jelem == NULL\n");
			
 
				     }
			
 
				     (*env)->SetObjectArrayElement(env, result, i, jelem);
			
 
				+    (*env)->DeleteLocalRef(env, jelem);
			
 
				   }
			
 
				   return result;
			
 
				 }
			
--- a/src/c++/libhdfs/hdfs_test.c
+++ b/src/c++/libhdfs/hdfs_test.c
@@ -397,11 +397,8 @@ int main(int argc, char **argv) {
 
				 
			
 
				       const char *tuser = "nobody";
			
 
				       const char* writePath = "/tmp/usertestfile.txt";
			
 
				-      const char **groups =  (const char**)malloc(sizeof(char*)* 2);
			
 
				-      groups[0] = "users";
			
 
				-      groups[1] = "nobody";
			
 
				 
			
 
				-      fs = hdfsConnectAsUser("default", 0, tuser, groups, 2);
			
 
				+      fs = hdfsConnectAsUser("default", 0, tuser);
			
 
				       if(!fs) {
			
 
				         fprintf(stderr, "Oops! Failed to connect to hdfs as user %s!\n",tuser);
			
 
				         exit(-1);
			
--- a/src/c++/pipes/Makefile.in
+++ b/src/c++/pipes/Makefile.in
@@ -1,8 +1,8 @@
 
				-# Makefile.in generated by automake 1.9 from Makefile.am.
			
 
				+# Makefile.in generated by automake 1.9.6 from Makefile.am.
			
 
				 # @configure_input@
			
 
				 
			
 
				 # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
			
 
				-# 2003, 2004  Free Software Foundation, Inc.
			
 
				+# 2003, 2004, 2005  Free Software Foundation, Inc.
			
 
				 # This Makefile.in is free software; the Free Software Foundation
			
 
				 # gives unlimited permission to copy and/or distribute it,
			
 
				 # with or without modifications, as long as this notice is preserved.
			
@@ -15,8 +15,6 @@
 
				 @SET_MAKE@
			
 
				 
			
 
				 
			
 
				-SOURCES = $(libhadooppipes_a_SOURCES)
			
 
				-
			
 
				 srcdir = @srcdir@
			
 
				 top_srcdir = @top_srcdir@
			
 
				 VPATH = @srcdir@
			
@@ -37,13 +35,12 @@ POST_INSTALL = :
 
				 NORMAL_UNINSTALL = :
			
 
				 PRE_UNINSTALL = :
			
 
				 POST_UNINSTALL = :
			
 
				+build_triplet = @build@
			
 
				 host_triplet = @host@
			
 
				-DIST_COMMON = config.guess config.guess config.sub config.sub \
			
 
				-	$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
			
 
				-	$(top_srcdir)/configure $(am__configure_deps) \
			
 
				-	$(top_srcdir)/impl/config.h.in depcomp depcomp ltmain.sh \
			
 
				-	ltmain.sh config.guess config.guess config.sub config.sub \
			
 
				-	$(api_HEADERS)
			
 
				+DIST_COMMON = config.guess config.sub $(srcdir)/Makefile.in \
			
 
				+	$(srcdir)/Makefile.am $(top_srcdir)/configure \
			
 
				+	$(am__configure_deps) $(top_srcdir)/impl/config.h.in depcomp \
			
 
				+	ltmain.sh config.guess config.sub $(api_HEADERS)
			
 
				 subdir = .
			
 
				 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
			
 
				 am__aclocal_m4_deps = $(top_srcdir)/../utils/m4/hadoop_utils.m4 \
			
@@ -75,11 +72,11 @@ depcomp = $(SHELL) $(top_srcdir)/depcomp
 
				 am__depfiles_maybe = depfiles
			
 
				 CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
			
 
				 	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
			
 
				-LTCXXCOMPILE = $(LIBTOOL) --mode=compile --tag=CXX $(CXX) $(DEFS) \
			
 
				+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) \
			
 
				 	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
			
 
				 	$(AM_CXXFLAGS) $(CXXFLAGS)
			
 
				 CXXLD = $(CXX)
			
 
				-CXXLINK = $(LIBTOOL) --mode=link --tag=CXX $(CXXLD) $(AM_CXXFLAGS) \
			
 
				+CXXLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXXLD) $(AM_CXXFLAGS) \
			
 
				 	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
			
 
				 SOURCES = $(libhadooppipes_a_SOURCES)
			
 
				 apiHEADERS_INSTALL = $(INSTALL_HEADER)
			
@@ -136,6 +133,7 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
 
				 PACKAGE_VERSION = @PACKAGE_VERSION@
			
 
				 PATH_SEPARATOR = @PATH_SEPARATOR@
			
 
				 RANLIB = @RANLIB@
			
 
				+SED = @SED@
			
 
				 SET_MAKE = @SET_MAKE@
			
 
				 SHELL = @SHELL@
			
 
				 STRIP = @STRIP@
			
--- a/src/c++/pipes/aclocal.m4
+++ b/src/c++/pipes/aclocal.m4
--- a/src/c++/pipes/configure
+++ b/src/c++/pipes/configure
--- a/src/c++/pipes/impl/HadoopPipes.cc
+++ b/src/c++/pipes/impl/HadoopPipes.cc
@@ -31,6 +31,11 @@
 
				 #include <strings.h>
			
 
				 #include <sys/socket.h>
			
 
				 #include <pthread.h>
			
 
				+#include <iostream>
			
 
				+#include <fstream>
			
 
				+
			
 
				+#include <openssl/hmac.h>
			
 
				+#include <openssl/buffer.h>
			
 
				 
			
 
				 using std::map;
			
 
				 using std::string;
			
@@ -289,9 +294,9 @@ namespace HadoopPipes {
 
				 
			
 
				   enum MESSAGE_TYPE {START_MESSAGE, SET_JOB_CONF, SET_INPUT_TYPES, RUN_MAP, 
			
 
				                      MAP_ITEM, RUN_REDUCE, REDUCE_KEY, REDUCE_VALUE, 
			
 
				-                     CLOSE, ABORT, 
			
 
				+                     CLOSE, ABORT, AUTHENTICATION_REQ,
			
 
				                      OUTPUT=50, PARTITIONED_OUTPUT, STATUS, PROGRESS, DONE,
			
 
				-                     REGISTER_COUNTER, INCREMENT_COUNTER};
			
 
				+                     REGISTER_COUNTER, INCREMENT_COUNTER, AUTHENTICATION_RESP};
			
 
				 
			
 
				   class BinaryUpwardProtocol: public UpwardProtocol {
			
 
				   private:
			
@@ -302,6 +307,12 @@ namespace HadoopPipes {
 
				       HADOOP_ASSERT(stream->open(_stream), "problem opening stream");
			
 
				     }
			
 
				 
			
 
				+    virtual void authenticate(const string &responseDigest) {
			
 
				+      serializeInt(AUTHENTICATION_RESP, *stream);
			
 
				+      serializeString(responseDigest, *stream);
			
 
				+      stream->flush();
			
 
				+    }
			
 
				+
			
 
				     virtual void output(const string& key, const string& value) {
			
 
				       serializeInt(OUTPUT, *stream);
			
 
				       serializeString(key, *stream);
			
@@ -358,6 +369,82 @@ namespace HadoopPipes {
 
				     BinaryUpwardProtocol * uplink;
			
 
				     string key;
			
 
				     string value;
			
 
				+    string password;
			
 
				+    bool authDone;
			
 
				+    void getPassword(string &password) {
			
 
				+      const char *passwordFile = getenv("hadoop.pipes.shared.secret.location");
			
 
				+      if (passwordFile == NULL) {
			
 
				+        return;
			
 
				+      }
			
 
				+      std::ifstream fstr(passwordFile, std::fstream::binary);
			
 
				+      if (fstr.fail()) {
			
 
				+        std::cerr << "Could not open the password file" << std::endl;
			
 
				+        return;
			
 
				+      } 
			
 
				+      unsigned char * passBuff = new unsigned char [512];
			
 
				+      fstr.read((char *)passBuff, 512);
			
 
				+      int passwordLength = fstr.gcount();
			
 
				+      fstr.close();
			
 
				+      passBuff[passwordLength] = 0;
			
 
				+      password.replace(0, passwordLength, (const char *) passBuff, passwordLength);
			
 
				+      delete [] passBuff;
			
 
				+      return; 
			
 
				+    }
			
 
				+
			
 
				+    void verifyDigestAndRespond(string& digest, string& challenge) {
			
 
				+      if (password.empty()) {
			
 
				+        //password can be empty if process is running in debug mode from
			
 
				+        //command file.
			
 
				+        authDone = true;
			
 
				+        return;
			
 
				+      }
			
 
				+
			
 
				+      if (!verifyDigest(password, digest, challenge)) {
			
 
				+        std::cerr << "Server failed to authenticate. Exiting" << std::endl;
			
 
				+        exit(-1);
			
 
				+      }
			
 
				+      authDone = true;
			
 
				+      string responseDigest = createDigest(password, digest);
			
 
				+      uplink->authenticate(responseDigest);
			
 
				+    }
			
 
				+
			
 
				+    bool verifyDigest(string &password, string& digest, string& challenge) {
			
 
				+      string expectedDigest = createDigest(password, challenge);
			
 
				+      if (digest == expectedDigest) {
			
 
				+        return true;
			
 
				+      } else {
			
 
				+        return false;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    string createDigest(string &password, string& msg) {
			
 
				+      HMAC_CTX ctx;
			
 
				+      unsigned char digest[EVP_MAX_MD_SIZE];
			
 
				+      HMAC_Init(&ctx, (const unsigned char *)password.c_str(), 
			
 
				+          password.length(), EVP_sha1());
			
 
				+      HMAC_Update(&ctx, (const unsigned char *)msg.c_str(), msg.length());
			
 
				+      unsigned int digestLen;
			
 
				+      HMAC_Final(&ctx, digest, &digestLen);
			
 
				+      HMAC_cleanup(&ctx);
			
 
				+
			
 
				+      //now apply base64 encoding
			
 
				+      BIO *bmem, *b64;
			
 
				+      BUF_MEM *bptr;
			
 
				+
			
 
				+      b64 = BIO_new(BIO_f_base64());
			
 
				+      bmem = BIO_new(BIO_s_mem());
			
 
				+      b64 = BIO_push(b64, bmem);
			
 
				+      BIO_write(b64, digest, digestLen);
			
 
				+      BIO_flush(b64);
			
 
				+      BIO_get_mem_ptr(b64, &bptr);
			
 
				+
			
 
				+      char digestBuffer[bptr->length];
			
 
				+      memcpy(digestBuffer, bptr->data, bptr->length-1);
			
 
				+      digestBuffer[bptr->length-1] = 0;
			
 
				+      BIO_free_all(b64);
			
 
				+
			
 
				+      return string(digestBuffer);
			
 
				+    }
			
 
				 
			
 
				   public:
			
 
				     BinaryProtocol(FILE* down, DownwardProtocol* _handler, FILE* up) {
			
@@ -365,6 +452,8 @@ namespace HadoopPipes {
 
				       downStream->open(down);
			
 
				       uplink = new BinaryUpwardProtocol(up);
			
 
				       handler = _handler;
			
 
				+      authDone = false;
			
 
				+      getPassword(password);
			
 
				     }
			
 
				 
			
 
				     UpwardProtocol* getUplink() {
			
@@ -374,7 +463,22 @@ namespace HadoopPipes {
 
				     virtual void nextEvent() {
			
 
				       int32_t cmd;
			
 
				       cmd = deserializeInt(*downStream);
			
 
				+      if (!authDone && cmd != AUTHENTICATION_REQ) {
			
 
				+        //Authentication request must be the first message if
			
 
				+        //authentication is not complete
			
 
				+        std::cerr << "Command:" << cmd << "received before authentication. " 
			
 
				+            << "Exiting.." << std::endl;
			
 
				+        exit(-1);
			
 
				+      }
			
 
				       switch (cmd) {
			
 
				+      case AUTHENTICATION_REQ: {
			
 
				+        string digest;
			
 
				+        string challenge;
			
 
				+        deserializeString(digest, *downStream);
			
 
				+        deserializeString(challenge, *downStream);
			
 
				+        verifyDigestAndRespond(digest, challenge);
			
 
				+        break;
			
 
				+      }
			
 
				       case START_MESSAGE: {
			
 
				         int32_t prot;
			
 
				         prot = deserializeInt(*downStream);
			
@@ -1021,7 +1125,6 @@ namespace HadoopPipes {
 
				         setbuf = setvbuf(outStream, bufout, _IOFBF, bufsize);
			
 
				         HADOOP_ASSERT(setbuf == 0, string("problem with setvbuf for outStream: ")
			
 
				                                      + strerror(errno));
			
 
				-
			
 
				         connection = new BinaryProtocol(stream, context, outStream);
			
 
				       } else if (getenv("hadoop.pipes.command.file")) {
			
 
				         char* filename = getenv("hadoop.pipes.command.file");
			
--- a/src/c++/task-controller/.autom4te.cfg
+++ b/src/c++/task-controller/.autom4te.cfg
@@ -17,28 +17,26 @@
 
				 #
			
 
				 
			
 
				 #
			
 
				-# Makefile template for building libhadoop.so 
			
 
				+# autom4te configuration for hadoop utils library
			
 
				 #
			
 
				 
			
 
				-#
			
 
				-# Notes: 
			
 
				-# 1. This makefile is designed to do the actual builds in $(HADOOP_HOME)/build/native/${os.name}-${os.arch}/lib 
			
 
				-# 2. This makefile depends on the following environment variables to function correctly:
			
 
				-#    * HADOOP_NATIVE_SRCDIR 
			
 
				-#    * JAVA_HOME
			
 
				-#    * OS_ARCH 
			
 
				-#    All these are setup by build.xml and/or the top-level makefile.
			
 
				-#
			
 
				+begin-language: "Autoheader-preselections"
			
 
				+args: --no-cache 
			
 
				+end-language: "Autoheader-preselections"
			
 
				 
			
 
				-# Add .lo files in $(SUBDIRS) to construct libhadoop.so
			
 
				-HADOOP_OBJS = $(foreach path,$(addprefix ../,$(SUBDIRS)),$(wildcard $(path)/*.lo))
			
 
				-AM_LDFLAGS = @JNI_LDFLAGS@ -m$(JVM_DATA_MODEL)
			
 
				+begin-language: "Automake-preselections"
			
 
				+args: --no-cache 
			
 
				+end-language: "Automake-preselections"
			
 
				 
			
 
				-lib_LTLIBRARIES = libhadoop.la
			
 
				-libhadoop_la_SOURCES = 
			
 
				-libhadoop_la_LDFLAGS = -version-info 1:0:0
			
 
				-libhadoop_la_LIBADD = $(HADOOP_OBJS) -ldl -ljvm
			
 
				+begin-language: "Autoreconf-preselections"
			
 
				+args: --no-cache 
			
 
				+end-language: "Autoreconf-preselections"
			
 
				+
			
 
				+begin-language: "Autoconf-without-aclocal-m4"
			
 
				+args: --no-cache 
			
 
				+end-language: "Autoconf-without-aclocal-m4"
			
 
				+
			
 
				+begin-language: "Autoconf"
			
 
				+args: --no-cache 
			
 
				+end-language: "Autoconf"
			
 
				 
			
 
				-#
			
 
				-#vim: sw=4: ts=4: noet
			
 
				-#
			
--- a/src/c++/task-controller/.gitignore
+++ b/src/c++/task-controller/.gitignore
@@ -0,0 +1,13 @@
 
				+Makefile
			
 
				+install-sh
			
 
				+aclocal.m4
			
 
				+compile
			
 
				+config.guess
			
 
				+config.sub
			
 
				+configure
			
 
				+depcomp
			
 
				+install-sh
			
 
				+ltmain.sh
			
 
				+Makefile.in
			
 
				+missing
			
 
				+stamp-h1
			
--- a/src/c++/task-controller/Makefile.am
+++ b/src/c++/task-controller/Makefile.am
@@ -0,0 +1,32 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+AM_CFLAGS=-I$(srcdir)/impl -Wall -g -Werror
			
 
				+
			
 
				+# Define the programs that need to be built
			
 
				+bin_PROGRAMS = task-controller
			
 
				+check_PROGRAMS = test-task-controller
			
 
				+
			
 
				+TESTS = test-task-controller
			
 
				+
			
 
				+# Define the sources for the common files
			
 
				+common_SOURCES = impl/configuration.c impl/task-controller.c
			
 
				+
			
 
				+# Define the sources for the real executable
			
 
				+task_controller_SOURCES = $(common_SOURCES) impl/main.c
			
 
				+
			
 
				+# Define the sources for the test executable
			
 
				+test_task_controller_SOURCES = $(common_SOURCES) test/test-task-controller.c
			
--- a/src/c++/task-controller/configure.ac
+++ b/src/c++/task-controller/configure.ac
@@ -0,0 +1,55 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one
			
 
				+# or more contributor license agreements.  See the NOTICE file
			
 
				+# distributed with this work for additional information
			
 
				+# regarding copyright ownership.  The ASF licenses this file
			
 
				+# to you under the Apache License, Version 2.0 (the
			
 
				+# "License"); you may not use this file except in compliance
			
 
				+# with the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+#
			
 
				+#                                               -*- Autoconf -*-
			
 
				+# Process this file with autoconf to produce a configure script.
			
 
				+
			
 
				+AC_PREREQ(2.59)
			
 
				+AC_INIT(linux-task-controller, 1.0.0, mapreduce-dev@hadoop.apache.org)
			
 
				+AC_GNU_SOURCE
			
 
				+AC_SYS_LARGEFILE
			
 
				+
			
 
				+AM_INIT_AUTOMAKE([subdir-objects foreign no-dist])
			
 
				+
			
 
				+AC_CONFIG_SRCDIR([impl/task-controller.c])
			
 
				+AC_CONFIG_FILES([Makefile])
			
 
				+
			
 
				+AC_PREFIX_DEFAULT(`pwd`/../install)
			
 
				+
			
 
				+CHECK_INSTALL_CFLAG
			
 
				+HADOOP_UTILS_SETUP
			
 
				+
			
 
				+# Checks for programs.
			
 
				+AC_PROG_CC
			
 
				+AM_PROG_CC_C_O
			
 
				+AC_PROG_LIBTOOL
			
 
				+
			
 
				+# Checks for libraries.
			
 
				+
			
 
				+# Checks for header files.
			
 
				+AC_LANG(C)
			
 
				+AC_CHECK_HEADERS([unistd.h])
			
 
				+
			
 
				+# Checks for typedefs, structures, and compiler characteristics.
			
 
				+AC_HEADER_STDBOOL
			
 
				+AC_C_CONST
			
 
				+AC_TYPE_OFF_T
			
 
				+AC_TYPE_SIZE_T
			
 
				+AC_FUNC_STRERROR_R
			
 
				+
			
 
				+# Checks for library functions.
			
 
				+AC_CHECK_FUNCS([mkdir uname])
			
 
				+AC_OUTPUT
			
--- a/src/c++/task-controller/impl/configuration.c
+++ b/src/c++/task-controller/impl/configuration.c
@@ -0,0 +1,297 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+// ensure we get the posix version of dirname by including this first
			
 
				+#include <libgen.h> 
			
 
				+
			
 
				+#include "configuration.h"
			
 
				+#include "task-controller.h"
			
 
				+
			
 
				+#include <errno.h>
			
 
				+#include <unistd.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#include <sys/stat.h>
			
 
				+#include <sys/types.h>
			
 
				+
			
 
				+#define INCREMENT_SIZE 1000
			
 
				+#define MAX_SIZE 10
			
 
				+
			
 
				+struct confentry {
			
 
				+  const char *key;
			
 
				+  const char *value;
			
 
				+};
			
 
				+
			
 
				+struct configuration {
			
 
				+  int size;
			
 
				+  struct confentry **confdetails;
			
 
				+};
			
 
				+
			
 
				+struct configuration config={.size=0, .confdetails=NULL};
			
 
				+
			
 
				+//clean up method for freeing configuration
			
 
				+void free_configurations() {
			
 
				+  int i = 0;
			
 
				+  for (i = 0; i < config.size; i++) {
			
 
				+    if (config.confdetails[i]->key != NULL) {
			
 
				+      free((void *)config.confdetails[i]->key);
			
 
				+    }
			
 
				+    if (config.confdetails[i]->value != NULL) {
			
 
				+      free((void *)config.confdetails[i]->value);
			
 
				+    }
			
 
				+    free(config.confdetails[i]);
			
 
				+  }
			
 
				+  if (config.size > 0) {
			
 
				+    free(config.confdetails);
			
 
				+  }
			
 
				+  config.size = 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Is the file/directory only writable by root.
			
 
				+ * Returns 1 if true
			
 
				+ */
			
 
				+static int is_only_root_writable(const char *file) {
			
 
				+  struct stat file_stat;
			
 
				+  if (stat(file, &file_stat) != 0) {
			
 
				+    fprintf(LOGFILE, "Can't stat file %s - %s\n", file, strerror(errno));
			
 
				+    return 0;
			
 
				+  }
			
 
				+  if (file_stat.st_uid != 0) {
			
 
				+    fprintf(LOGFILE, "File %s must be owned by root, but is owned by %d\n",
			
 
				+            file, file_stat.st_uid);
			
 
				+    return 0;
			
 
				+  }
			
 
				+  if ((file_stat.st_mode & (S_IWGRP | S_IWOTH)) != 0) {
			
 
				+    fprintf(LOGFILE, 
			
 
				+	    "File %s must not be world or group writable, but is %03o\n",
			
 
				+	    file, file_stat.st_mode & (~S_IFMT));
			
 
				+    return 0;
			
 
				+  }
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Ensure that the configuration file and all of the containing directories
			
 
				+ * are only writable by root. Otherwise, an attacker can change the 
			
 
				+ * configuration and potentially cause damage.
			
 
				+ * returns 0 if permissions are ok
			
 
				+ */
			
 
				+int check_configuration_permissions(const char* file_name) {
			
 
				+  // copy the input so that we can modify it with dirname
			
 
				+  char* dir = strdup(file_name);
			
 
				+  char* buffer = dir;
			
 
				+  do {
			
 
				+    if (!is_only_root_writable(dir)) {
			
 
				+      free(buffer);
			
 
				+      return -1;
			
 
				+    }
			
 
				+    dir = dirname(dir);
			
 
				+  } while (strcmp(dir, "/") != 0);
			
 
				+  free(buffer);
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+//function used to load the configurations present in the secure config
			
 
				+void read_config(const char* file_name) {
			
 
				+  fprintf(LOGFILE, "Reading task controller config from %s\n" , file_name);
			
 
				+  FILE *conf_file;
			
 
				+  char *line;
			
 
				+  char *equaltok;
			
 
				+  char *temp_equaltok;
			
 
				+  size_t linesize = 1000;
			
 
				+  int size_read = 0;
			
 
				+
			
 
				+  if (file_name == NULL) {
			
 
				+    fprintf(LOGFILE, "Null configuration filename passed in\n");
			
 
				+    exit(INVALID_CONFIG_FILE);
			
 
				+  }
			
 
				+
			
 
				+  #ifdef DEBUG
			
 
				+    fprintf(LOGFILE, "read_config :Conf file name is : %s \n", file_name);
			
 
				+  #endif
			
 
				+
			
 
				+  //allocate space for ten configuration items.
			
 
				+  config.confdetails = (struct confentry **) malloc(sizeof(struct confentry *)
			
 
				+      * MAX_SIZE);
			
 
				+  config.size = 0;
			
 
				+  conf_file = fopen(file_name, "r");
			
 
				+  if (conf_file == NULL) {
			
 
				+    fprintf(LOGFILE, "Invalid conf file provided : %s \n", file_name);
			
 
				+    exit(INVALID_CONFIG_FILE);
			
 
				+  }
			
 
				+  while(!feof(conf_file)) {
			
 
				+    line = (char *) malloc(linesize);
			
 
				+    if(line == NULL) {
			
 
				+      fprintf(LOGFILE, "malloc failed while reading configuration file.\n");
			
 
				+      exit(OUT_OF_MEMORY);
			
 
				+    }
			
 
				+    size_read = getline(&line,&linesize,conf_file);
			
 
				+    //feof returns true only after we read past EOF.
			
 
				+    //so a file with no new line, at last can reach this place
			
 
				+    //if size_read returns negative check for eof condition
			
 
				+    if (size_read == -1) {
			
 
				+      if(!feof(conf_file)){
			
 
				+        fprintf(LOGFILE, "getline returned error.\n");
			
 
				+        exit(INVALID_CONFIG_FILE);
			
 
				+      }else {
			
 
				+        free(line);
			
 
				+        break;
			
 
				+      }
			
 
				+    }
			
 
				+    //trim the ending new line
			
 
				+    line[strlen(line)-1] = '\0';
			
 
				+    //comment line
			
 
				+    if(line[0] == '#') {
			
 
				+      free(line);
			
 
				+      continue;
			
 
				+    }
			
 
				+    //tokenize first to get key and list of values.
			
 
				+    //if no equals is found ignore this line, can be an empty line also
			
 
				+    equaltok = strtok_r(line, "=", &temp_equaltok);
			
 
				+    if(equaltok == NULL) {
			
 
				+      free(line);
			
 
				+      continue;
			
 
				+    }
			
 
				+    config.confdetails[config.size] = (struct confentry *) malloc(
			
 
				+            sizeof(struct confentry));
			
 
				+    if(config.confdetails[config.size] == NULL) {
			
 
				+      fprintf(LOGFILE,
			
 
				+          "Failed allocating memory for single configuration item\n");
			
 
				+      goto cleanup;
			
 
				+    }
			
 
				+
			
 
				+    #ifdef DEBUG
			
 
				+      fprintf(LOGFILE, "read_config : Adding conf key : %s \n", equaltok);
			
 
				+    #endif
			
 
				+
			
 
				+    memset(config.confdetails[config.size], 0, sizeof(struct confentry));
			
 
				+    config.confdetails[config.size]->key = (char *) malloc(
			
 
				+            sizeof(char) * (strlen(equaltok)+1));
			
 
				+    strcpy((char *)config.confdetails[config.size]->key, equaltok);
			
 
				+    equaltok = strtok_r(NULL, "=", &temp_equaltok);
			
 
				+    if (equaltok == NULL) {
			
 
				+      fprintf(LOGFILE, "configuration tokenization failed \n");
			
 
				+      goto cleanup;
			
 
				+    }
			
 
				+    //means value is commented so don't store the key
			
 
				+    if(equaltok[0] == '#') {
			
 
				+      free(line);
			
 
				+      free((void *)config.confdetails[config.size]->key);
			
 
				+      free(config.confdetails[config.size]);
			
 
				+      continue;
			
 
				+    }
			
 
				+
			
 
				+    #ifdef DEBUG
			
 
				+      fprintf(LOGFILE, "read_config : Adding conf value : %s \n", equaltok);
			
 
				+    #endif
			
 
				+
			
 
				+    config.confdetails[config.size]->value = (char *) malloc(
			
 
				+            sizeof(char) * (strlen(equaltok)+1));
			
 
				+    strcpy((char *)config.confdetails[config.size]->value, equaltok);
			
 
				+    if((config.size + 1) % MAX_SIZE  == 0) {
			
 
				+      config.confdetails = (struct confentry **) realloc(config.confdetails,
			
 
				+          sizeof(struct confentry **) * (MAX_SIZE + config.size));
			
 
				+      if (config.confdetails == NULL) {
			
 
				+        fprintf(LOGFILE,
			
 
				+            "Failed re-allocating memory for configuration items\n");
			
 
				+        goto cleanup;
			
 
				+      }
			
 
				+    }
			
 
				+    if(config.confdetails[config.size] )
			
 
				+    config.size++;
			
 
				+    free(line);
			
 
				+  }
			
 
				+
			
 
				+  //close the file
			
 
				+  fclose(conf_file);
			
 
				+
			
 
				+  if (config.size == 0) {
			
 
				+    fprintf(LOGFILE, "Invalid configuration provided in %s\n", file_name);
			
 
				+    exit(INVALID_CONFIG_FILE);
			
 
				+  }
			
 
				+  //clean up allocated file name
			
 
				+  return;
			
 
				+  //free spaces alloced.
			
 
				+  cleanup:
			
 
				+  if (line != NULL) {
			
 
				+    free(line);
			
 
				+  }
			
 
				+  fclose(conf_file);
			
 
				+  free_configurations();
			
 
				+  return;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * function used to get a configuration value.
			
 
				+ * The function for the first time populates the configuration details into
			
 
				+ * array, next time onwards used the populated array.
			
 
				+ *
			
 
				+ */
			
 
				+char * get_value(const char* key) {
			
 
				+  int count;
			
 
				+  for (count = 0; count < config.size; count++) {
			
 
				+    if (strcmp(config.confdetails[count]->key, key) == 0) {
			
 
				+      return strdup(config.confdetails[count]->value);
			
 
				+    }
			
 
				+  }
			
 
				+  return NULL;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Function to return an array of values for a key.
			
 
				+ * Value delimiter is assumed to be a comma.
			
 
				+ */
			
 
				+char ** get_values(const char * key) {
			
 
				+  char ** toPass = NULL;
			
 
				+  char *value = get_value(key);
			
 
				+  char *tempTok = NULL;
			
 
				+  char *tempstr = NULL;
			
 
				+  int size = 0;
			
 
				+  int toPassSize = MAX_SIZE;
			
 
				+
			
 
				+  //first allocate any array of 10
			
 
				+  if(value != NULL) {
			
 
				+    toPass = (char **) malloc(sizeof(char *) * toPassSize);
			
 
				+    tempTok = strtok_r((char *)value, ",", &tempstr);
			
 
				+    while (tempTok != NULL) {
			
 
				+      toPass[size++] = tempTok;
			
 
				+      if(size == toPassSize) {
			
 
				+        toPassSize += MAX_SIZE;
			
 
				+        toPass = (char **) realloc(toPass,(sizeof(char *) *
			
 
				+                                           (MAX_SIZE * toPassSize)));
			
 
				+      }
			
 
				+      tempTok = strtok_r(NULL, ",", &tempstr);
			
 
				+    }
			
 
				+  }
			
 
				+  if (size > 0) {
			
 
				+    toPass[size] = NULL;
			
 
				+  }
			
 
				+  return toPass;
			
 
				+}
			
 
				+
			
 
				+// free an entry set of values
			
 
				+void free_values(char** values) {
			
 
				+  if (*values != NULL) {
			
 
				+    free(*values);
			
 
				+  }
			
 
				+  if (values != NULL) {
			
 
				+    free(values);
			
 
				+  }
			
 
				+}
			
--- a/src/c++/task-controller/impl/configuration.h
+++ b/src/c++/task-controller/impl/configuration.h
@@ -0,0 +1,42 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * Ensure that the configuration file and all of the containing directories
			
 
				+ * are only writable by root. Otherwise, an attacker can change the 
			
 
				+ * configuration and potentially cause damage.
			
 
				+ * returns 0 if permissions are ok
			
 
				+ */
			
 
				+int check_configuration_permissions(const char* file_name);
			
 
				+
			
 
				+// read the given configuration file
			
 
				+void read_config(const char* config_file);
			
 
				+
			
 
				+//method exposed to get the configurations
			
 
				+char *get_value(const char* key);
			
 
				+
			
 
				+//function to return array of values pointing to the key. Values are
			
 
				+//comma seperated strings.
			
 
				+char ** get_values(const char* key);
			
 
				+
			
 
				+// free the memory returned by get_values
			
 
				+void free_values(char** values);
			
 
				+
			
 
				+//method to free allocated configuration
			
 
				+void free_configurations();
			
 
				+
			
--- a/src/c++/task-controller/impl/main.c
+++ b/src/c++/task-controller/impl/main.c
@@ -0,0 +1,196 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+#include "configuration.h"
			
 
				+#include "task-controller.h"
			
 
				+
			
 
				+#include <errno.h>
			
 
				+#include <grp.h>
			
 
				+#include <limits.h>
			
 
				+#include <unistd.h>
			
 
				+#include <signal.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#include <sys/stat.h>
			
 
				+
			
 
				+#define _STRINGIFY(X) #X
			
 
				+#define STRINGIFY(X) _STRINGIFY(X)
			
 
				+#define CONF_FILENAME "taskcontroller.cfg"
			
 
				+
			
 
				+void display_usage(FILE *stream) {
			
 
				+  fprintf(stream,
			
 
				+      "Usage: task-controller user command command-args\n");
			
 
				+  fprintf(stream, "Commands:\n");
			
 
				+  fprintf(stream, "   initialize job:       %2d jobid credentials cmd args\n",
			
 
				+	  INITIALIZE_JOB);
			
 
				+  fprintf(stream, "   launch task:          %2d jobid taskid task-script\n",
			
 
				+	  LAUNCH_TASK_JVM);
			
 
				+  fprintf(stream, "   signal task:          %2d task-pid signal\n",
			
 
				+	  SIGNAL_TASK);
			
 
				+  fprintf(stream, "   delete as user:       %2d relative-path\n",
			
 
				+	  DELETE_AS_USER);
			
 
				+  fprintf(stream, "   delete log:           %2d relative-path\n",
			
 
				+	  DELETE_LOG_AS_USER);
			
 
				+  fprintf(stream, "   run command as user:  %2d cmd args\n",
			
 
				+	  RUN_COMMAND_AS_USER);
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char **argv) {
			
 
				+  //Minimum number of arguments required to run the task-controller
			
 
				+  if (argc < 4) {
			
 
				+    display_usage(stdout);
			
 
				+    return INVALID_ARGUMENT_NUMBER;
			
 
				+  }
			
 
				+
			
 
				+  LOGFILE = stdout;
			
 
				+  int command;
			
 
				+  const char * job_id = NULL;
			
 
				+  const char * task_id = NULL;
			
 
				+  const char * cred_file = NULL;
			
 
				+  const char * script_file = NULL;
			
 
				+  const char * current_dir = NULL;
			
 
				+  const char * job_xml = NULL;
			
 
				+
			
 
				+  int exit_code = 0;
			
 
				+
			
 
				+  char * dir_to_be_deleted = NULL;
			
 
				+
			
 
				+  char *executable_file = get_executable();
			
 
				+
			
 
				+#ifndef HADOOP_CONF_DIR
			
 
				+  #error HADOOP_CONF_DIR must be defined
			
 
				+#endif
			
 
				+
			
 
				+  char *orig_conf_file = STRINGIFY(HADOOP_CONF_DIR) "/" CONF_FILENAME;
			
 
				+  char *conf_file = realpath(orig_conf_file, NULL);
			
 
				+
			
 
				+  if (conf_file == NULL) {
			
 
				+    fprintf(LOGFILE, "Configuration file %s not found.\n", orig_conf_file);
			
 
				+    return INVALID_CONFIG_FILE;
			
 
				+  }
			
 
				+  if (check_configuration_permissions(conf_file) != 0) {
			
 
				+    return INVALID_CONFIG_FILE;
			
 
				+  }
			
 
				+  read_config(conf_file);
			
 
				+  free(conf_file);
			
 
				+
			
 
				+  // look up the task tracker group in the config file
			
 
				+  char *tt_group = get_value(TT_GROUP_KEY);
			
 
				+  if (tt_group == NULL) {
			
 
				+    fprintf(LOGFILE, "Can't get configured value for %s.\n", TT_GROUP_KEY);
			
 
				+    exit(INVALID_CONFIG_FILE);
			
 
				+  }
			
 
				+  struct group *group_info = getgrnam(tt_group);
			
 
				+  if (group_info == NULL) {
			
 
				+    fprintf(LOGFILE, "Can't get group information for %s - %s.\n", tt_group,
			
 
				+            strerror(errno));
			
 
				+    exit(INVALID_CONFIG_FILE);
			
 
				+  }
			
 
				+  set_tasktracker_uid(getuid(), group_info->gr_gid);
			
 
				+  // if we are running from a setuid executable, make the real uid root
			
 
				+  setuid(0);
			
 
				+  // set the real and effective group id to the task tracker group
			
 
				+  setgid(group_info->gr_gid);
			
 
				+
			
 
				+  if (check_taskcontroller_permissions(executable_file) != 0) {
			
 
				+    fprintf(LOGFILE, "Invalid permissions on task-controller binary.\n");
			
 
				+    return INVALID_TASKCONTROLLER_PERMISSIONS;
			
 
				+  }
			
 
				+
			
 
				+  //checks done for user name
			
 
				+  if (argv[optind] == NULL) {
			
 
				+    fprintf(LOGFILE, "Invalid user name \n");
			
 
				+    return INVALID_USER_NAME;
			
 
				+  }
			
 
				+  int ret = set_user(argv[optind]);
			
 
				+  if (ret != 0) {
			
 
				+    return ret;
			
 
				+  }
			
 
				+
			
 
				+  optind = optind + 1;
			
 
				+  command = atoi(argv[optind++]);
			
 
				+
			
 
				+  fprintf(LOGFILE, "main : command provided %d\n",command);
			
 
				+  fprintf(LOGFILE, "main : user is %s\n", user_detail->pw_name);
			
 
				+
			
 
				+  switch (command) {
			
 
				+  case INITIALIZE_JOB:
			
 
				+    if (argc < 7) {
			
 
				+      fprintf(LOGFILE, "Too few arguments (%d vs 7) for initialize job\n",
			
 
				+	      argc);
			
 
				+      return INVALID_ARGUMENT_NUMBER;
			
 
				+    }
			
 
				+    job_id = argv[optind++];
			
 
				+    cred_file = argv[optind++];
			
 
				+    job_xml = argv[optind++];
			
 
				+    exit_code = initialize_job(user_detail->pw_name, job_id, cred_file,
			
 
				+                               job_xml, argv + optind);
			
 
				+    break;
			
 
				+  case LAUNCH_TASK_JVM:
			
 
				+    if (argc < 7) {
			
 
				+      fprintf(LOGFILE, "Too few arguments (%d vs 7) for launch task\n",
			
 
				+	      argc);
			
 
				+      return INVALID_ARGUMENT_NUMBER;
			
 
				+    }
			
 
				+    job_id = argv[optind++];
			
 
				+    task_id = argv[optind++];
			
 
				+    current_dir = argv[optind++];
			
 
				+    script_file = argv[optind++];
			
 
				+    exit_code = run_task_as_user(user_detail->pw_name, job_id, task_id, 
			
 
				+                                 current_dir, script_file);
			
 
				+    break;
			
 
				+  case SIGNAL_TASK:
			
 
				+    if (argc < 5) {
			
 
				+      fprintf(LOGFILE, "Too few arguments (%d vs 5) for signal task\n",
			
 
				+	      argc);
			
 
				+      return INVALID_ARGUMENT_NUMBER;
			
 
				+    } else {
			
 
				+      char* end_ptr = NULL;
			
 
				+      char* option = argv[optind++];
			
 
				+      int task_pid = strtol(option, &end_ptr, 10);
			
 
				+      if (option == end_ptr || *end_ptr != '\0') {
			
 
				+        fprintf(LOGFILE, "Illegal argument for task pid %s\n", option);
			
 
				+        return INVALID_ARGUMENT_NUMBER;
			
 
				+      }
			
 
				+      option = argv[optind++];
			
 
				+      int signal = strtol(option, &end_ptr, 10);
			
 
				+      if (option == end_ptr || *end_ptr != '\0') {
			
 
				+        fprintf(LOGFILE, "Illegal argument for signal %s\n", option);
			
 
				+        return INVALID_ARGUMENT_NUMBER;
			
 
				+      }
			
 
				+      exit_code = signal_user_task(user_detail->pw_name, task_pid, signal);
			
 
				+    }
			
 
				+    break;
			
 
				+  case DELETE_AS_USER:
			
 
				+    dir_to_be_deleted = argv[optind++];
			
 
				+    exit_code= delete_as_user(user_detail->pw_name, dir_to_be_deleted);
			
 
				+    break;
			
 
				+  case DELETE_LOG_AS_USER:
			
 
				+    dir_to_be_deleted = argv[optind++];
			
 
				+    exit_code= delete_log_directory(dir_to_be_deleted);
			
 
				+    break;
			
 
				+  case RUN_COMMAND_AS_USER:
			
 
				+    exit_code = run_command_as_user(user_detail->pw_name, argv + optind);
			
 
				+    break;
			
 
				+  default:
			
 
				+    exit_code = INVALID_COMMAND_PROVIDED;
			
 
				+  }
			
 
				+  fclose(LOGFILE);
			
 
				+  return exit_code;
			
 
				+}
			
--- a/src/c++/task-controller/impl/task-controller.c
+++ b/src/c++/task-controller/impl/task-controller.c
@@ -0,0 +1,1062 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+#include "configuration.h"
			
 
				+#include "task-controller.h"
			
 
				+
			
 
				+#include <dirent.h>
			
 
				+#include <fcntl.h>
			
 
				+#include <fts.h>
			
 
				+#include <errno.h>
			
 
				+#include <grp.h>
			
 
				+#include <unistd.h>
			
 
				+#include <signal.h>
			
 
				+#include <stdarg.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#include <sys/stat.h>
			
 
				+
			
 
				+#define USER_DIR_PATTERN "%s/taskTracker/%s"
			
 
				+
			
 
				+#define TT_JOB_DIR_PATTERN USER_DIR_PATTERN "/jobcache/%s"
			
 
				+
			
 
				+#define ATTEMPT_DIR_PATTERN TT_JOB_DIR_PATTERN "/%s/work"
			
 
				+
			
 
				+#define TASK_SCRIPT "taskjvm.sh"
			
 
				+
			
 
				+#define TT_LOCAL_TASK_DIR_PATTERN    "%s/taskTracker/%s/jobcache/%s/%s"
			
 
				+
			
 
				+#define TT_SYS_DIR_KEY "mapred.local.dir"
			
 
				+
			
 
				+#define TT_LOG_DIR_KEY "hadoop.log.dir"
			
 
				+
			
 
				+#define JOB_FILENAME "job.xml"
			
 
				+
			
 
				+#define CREDENTIALS_FILENAME "jobToken"
			
 
				+
			
 
				+#define MIN_USERID_KEY "min.user.id"
			
 
				+
			
 
				+static const int DEFAULT_MIN_USERID = 1000;
			
 
				+
			
 
				+#define BANNED_USERS_KEY "banned.users"
			
 
				+
			
 
				+static const char* DEFAULT_BANNED_USERS[] = {"mapred", "hdfs", "bin", 0};
			
 
				+
			
 
				+//struct to store the user details
			
 
				+struct passwd *user_detail = NULL;
			
 
				+
			
 
				+FILE* LOGFILE = NULL;
			
 
				+
			
 
				+static uid_t tt_uid = -1;
			
 
				+static gid_t tt_gid = -1;
			
 
				+
			
 
				+void set_tasktracker_uid(uid_t user, gid_t group) {
			
 
				+  tt_uid = user;
			
 
				+  tt_gid = group;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * get the executable filename.
			
 
				+ */
			
 
				+char* get_executable() {
			
 
				+  char buffer[PATH_MAX];
			
 
				+  snprintf(buffer, PATH_MAX, "/proc/%u/exe", getpid());
			
 
				+  char *filename = malloc(PATH_MAX);
			
 
				+  ssize_t len = readlink(buffer, filename, PATH_MAX);
			
 
				+  if (len == -1) {
			
 
				+    fprintf(stderr, "Can't get executable name from %s - %s\n", buffer,
			
 
				+            strerror(errno));
			
 
				+    exit(-1);
			
 
				+  } else if (len >= PATH_MAX) {
			
 
				+    fprintf(LOGFILE, "Executable name %.*s is longer than %d characters.\n",
			
 
				+            PATH_MAX, filename, PATH_MAX);
			
 
				+    exit(-1);
			
 
				+  }
			
 
				+  filename[len] = '\0';
			
 
				+  return filename;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Check the permissions on taskcontroller to make sure that security is
			
 
				+ * promisable. For this, we need task-controller binary to
			
 
				+ *    * be user-owned by root
			
 
				+ *    * be group-owned by a configured special group.
			
 
				+ *    * others do not have any permissions
			
 
				+ *    * be setuid/setgid
			
 
				+ */
			
 
				+int check_taskcontroller_permissions(char *executable_file) {
			
 
				+
			
 
				+  errno = 0;
			
 
				+  char * resolved_path = realpath(executable_file, NULL);
			
 
				+  if (resolved_path == NULL) {
			
 
				+    fprintf(LOGFILE,
			
 
				+        "Error resolving the canonical name for the executable : %s!",
			
 
				+        strerror(errno));
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  struct stat filestat;
			
 
				+  errno = 0;
			
 
				+  if (stat(resolved_path, &filestat) != 0) {
			
 
				+    fprintf(LOGFILE, 
			
 
				+            "Could not stat the executable : %s!.\n", strerror(errno));
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  uid_t binary_euid = filestat.st_uid; // Binary's user owner
			
 
				+  gid_t binary_gid = filestat.st_gid; // Binary's group owner
			
 
				+
			
 
				+  // Effective uid should be root
			
 
				+  if (binary_euid != 0) {
			
 
				+    fprintf(LOGFILE,
			
 
				+        "The task-controller binary should be user-owned by root.\n");
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  if (binary_gid != getgid()) {
			
 
				+    fprintf(LOGFILE, "The configured tasktracker group %d is different from"
			
 
				+            " the group of the executable %d\n", getgid(), binary_gid);
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // check others do not have read/write/execute permissions
			
 
				+  if ((filestat.st_mode & S_IROTH) == S_IROTH || (filestat.st_mode & S_IWOTH)
			
 
				+      == S_IWOTH || (filestat.st_mode & S_IXOTH) == S_IXOTH) {
			
 
				+    fprintf(LOGFILE,
			
 
				+            "The task-controller binary should not have read or write or"
			
 
				+            " execute for others.\n");
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // Binary should be setuid/setgid executable
			
 
				+  if ((filestat.st_mode & S_ISUID) == 0) {
			
 
				+    fprintf(LOGFILE, "The task-controller binary should be set setuid.\n");
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Change the effective user id to limit damage.
			
 
				+ */
			
 
				+static int change_effective_user(uid_t user, gid_t group) {
			
 
				+  if (geteuid() == user) {
			
 
				+    return 0;
			
 
				+  }
			
 
				+  if (seteuid(0) != 0) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  if (setegid(group) != 0) {
			
 
				+    fprintf(LOGFILE, "Failed to set effective group id %d - %s\n", group,
			
 
				+            strerror(errno));
			
 
				+    return -1;
			
 
				+  }
			
 
				+  if (seteuid(user) != 0) {
			
 
				+    fprintf(LOGFILE, "Failed to set effective user id %d - %s\n", user,
			
 
				+            strerror(errno));
			
 
				+    return -1;
			
 
				+  }
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Change the real and effective user and group to abandon the super user
			
 
				+ * priviledges.
			
 
				+ */
			
 
				+int change_user(uid_t user, gid_t group) {
			
 
				+  if (user == getuid() && user == geteuid() && 
			
 
				+      group == getgid() && group == getegid()) {
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  if (seteuid(0) != 0) {
			
 
				+    fprintf(LOGFILE, "unable to reacquire root - %s\n", strerror(errno));
			
 
				+    fprintf(LOGFILE, "Real: %d:%d; Effective: %d:%d\n",
			
 
				+	    getuid(), getgid(), geteuid(), getegid());
			
 
				+    return SETUID_OPER_FAILED;
			
 
				+  }
			
 
				+  if (setgid(group) != 0) {
			
 
				+    fprintf(LOGFILE, "unable to set group to %d - %s\n", group, 
			
 
				+            strerror(errno));
			
 
				+    fprintf(LOGFILE, "Real: %d:%d; Effective: %d:%d\n",
			
 
				+	    getuid(), getgid(), geteuid(), getegid());
			
 
				+    return SETUID_OPER_FAILED;
			
 
				+  }
			
 
				+  if (setuid(user) != 0) {
			
 
				+    fprintf(LOGFILE, "unable to set user to %d - %s\n", user, strerror(errno));
			
 
				+    fprintf(LOGFILE, "Real: %d:%d; Effective: %d:%d\n",
			
 
				+	    getuid(), getgid(), geteuid(), getegid());
			
 
				+    return SETUID_OPER_FAILED;
			
 
				+  }
			
 
				+
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Utility function to concatenate argB to argA using the concat_pattern.
			
 
				+ */
			
 
				+char *concatenate(char *concat_pattern, char *return_path_name, 
			
 
				+                  int numArgs, ...) {
			
 
				+  va_list ap;
			
 
				+  va_start(ap, numArgs);
			
 
				+  int strlen_args = 0;
			
 
				+  char *arg = NULL;
			
 
				+  int j;
			
 
				+  for (j = 0; j < numArgs; j++) {
			
 
				+    arg = va_arg(ap, char*);
			
 
				+    if (arg == NULL) {
			
 
				+      fprintf(LOGFILE, "One of the arguments passed for %s in null.\n",
			
 
				+          return_path_name);
			
 
				+      return NULL;
			
 
				+    }
			
 
				+    strlen_args += strlen(arg);
			
 
				+  }
			
 
				+  va_end(ap);
			
 
				+
			
 
				+  char *return_path = NULL;
			
 
				+  int str_len = strlen(concat_pattern) + strlen_args + 1;
			
 
				+
			
 
				+  return_path = (char *) malloc(str_len);
			
 
				+  if (return_path == NULL) {
			
 
				+    fprintf(LOGFILE, "Unable to allocate memory for %s.\n", return_path_name);
			
 
				+    return NULL;
			
 
				+  }
			
 
				+  va_start(ap, numArgs);
			
 
				+  vsnprintf(return_path, str_len, concat_pattern, ap);
			
 
				+  va_end(ap);
			
 
				+  return return_path;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Get the job-directory path from tt_root, user name and job-id
			
 
				+ */
			
 
				+char *get_job_directory(const char * tt_root, const char *user,
			
 
				+                        const char *jobid) {
			
 
				+  return concatenate(TT_JOB_DIR_PATTERN, "job_dir_path", 3, tt_root, user,
			
 
				+      jobid);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Get the user directory of a particular user
			
 
				+ */
			
 
				+char *get_user_directory(const char *tt_root, const char *user) {
			
 
				+  return concatenate(USER_DIR_PATTERN, "user_dir_path", 2, tt_root, user);
			
 
				+}
			
 
				+
			
 
				+char *get_job_work_directory(const char *job_dir) {
			
 
				+  return concatenate("%s/work", "job work", 1, job_dir);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Get the attempt directory for the given attempt_id
			
 
				+ */
			
 
				+char *get_attempt_work_directory(const char *tt_root, const char *user,
			
 
				+				 const char *job_id, const char *attempt_id) {
			
 
				+  return concatenate(ATTEMPT_DIR_PATTERN, "attempt_dir_path", 4,
			
 
				+                     tt_root, user, job_id, attempt_id);
			
 
				+}
			
 
				+
			
 
				+char *get_task_launcher_file(const char* work_dir) {
			
 
				+  return concatenate("%s/%s", "task launcher", 2, work_dir, TASK_SCRIPT);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Get the job log directory.
			
 
				+ * Ensures that the result is a realpath and that it is underneath the 
			
 
				+ * tt log root.
			
 
				+ */
			
 
				+char* get_job_log_directory(const char* jobid) {
			
 
				+  char* log_dir = get_value(TT_LOG_DIR_KEY);
			
 
				+  if (log_dir == NULL) {
			
 
				+    fprintf(LOGFILE, "Log directory %s is not configured.\n", TT_LOG_DIR_KEY);
			
 
				+    return NULL;
			
 
				+  }
			
 
				+  char *result = concatenate("%s/userlogs/%s", "job log dir", 2, log_dir, 
			
 
				+                             jobid);
			
 
				+  if (result == NULL) {
			
 
				+    fprintf(LOGFILE, "failed to get memory in get_job_log_directory for %s"
			
 
				+            " and %s\n", log_dir, jobid);
			
 
				+  }
			
 
				+  free(log_dir);
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Get a user subdirectory.
			
 
				+ */
			
 
				+char *get_user_subdirectory(const char *tt_root,
			
 
				+                            const char *user,
			
 
				+                            const char *subdir) {
			
 
				+  char * user_dir = get_user_directory(tt_root, user);
			
 
				+  char * result = concatenate("%s/%s", "user subdir", 2,
			
 
				+                              user_dir, subdir);
			
 
				+  free(user_dir);
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Ensure that the given path and all of the parent directories are created
			
 
				+ * with the desired permissions.
			
 
				+ */
			
 
				+int mkdirs(const char* path, mode_t perm) {
			
 
				+  char *buffer = strdup(path);
			
 
				+  char *token;
			
 
				+  int cwd = open("/", O_RDONLY);
			
 
				+  if (cwd == -1) {
			
 
				+    fprintf(LOGFILE, "Can't open / in %s - %s\n", path, strerror(errno));
			
 
				+    free(buffer);
			
 
				+    return -1;
			
 
				+  }
			
 
				+  for(token = strtok(buffer, "/"); token != NULL; token = strtok(NULL, "/")) {
			
 
				+    if (mkdirat(cwd, token, perm) != 0) {
			
 
				+      if (errno != EEXIST) {
			
 
				+        fprintf(LOGFILE, "Can't create directory %s in %s - %s\n", 
			
 
				+                token, path, strerror(errno));
			
 
				+        close(cwd);
			
 
				+        free(buffer);
			
 
				+        return -1;
			
 
				+      }
			
 
				+    }
			
 
				+    int new_dir = openat(cwd, token, O_RDONLY);
			
 
				+    close(cwd);
			
 
				+    cwd = new_dir;
			
 
				+    if (cwd == -1) {
			
 
				+      fprintf(LOGFILE, "Can't open %s in %s - %s\n", token, path, 
			
 
				+              strerror(errno));
			
 
				+      free(buffer);
			
 
				+      return -1;
			
 
				+    }
			
 
				+  }
			
 
				+  free(buffer);
			
 
				+  close(cwd);
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Function to prepare the attempt directories for the task JVM.
			
 
				+ * It creates the task work and log directories.
			
 
				+ */
			
 
				+static int create_attempt_directories(const char* user, const char *job_id, 
			
 
				+					const char *task_id) {
			
 
				+  // create dirs as 0750
			
 
				+  const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP;
			
 
				+  if (job_id == NULL || task_id == NULL || user == NULL) {
			
 
				+    fprintf(LOGFILE, 
			
 
				+            "Either task_id is null or the user passed is null.\n");
			
 
				+    return -1;
			
 
				+  }
			
 
				+  int result = 0;
			
 
				+
			
 
				+  char **local_dir = get_values(TT_SYS_DIR_KEY);
			
 
				+
			
 
				+  if (local_dir == NULL) {
			
 
				+    fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  char **local_dir_ptr;
			
 
				+  for(local_dir_ptr = local_dir; *local_dir_ptr != NULL; ++local_dir_ptr) {
			
 
				+    char *task_dir = get_attempt_work_directory(*local_dir_ptr, user, job_id, 
			
 
				+                                                task_id);
			
 
				+    if (task_dir == NULL) {
			
 
				+      free_values(local_dir);
			
 
				+      return -1;
			
 
				+    }
			
 
				+    if (mkdirs(task_dir, perms) != 0) {
			
 
				+      // continue on to create other task directories
			
 
				+      free(task_dir);
			
 
				+    } else {
			
 
				+      free(task_dir);
			
 
				+    }
			
 
				+  }
			
 
				+  free_values(local_dir);
			
 
				+
			
 
				+  // also make the directory for the task logs
			
 
				+  char *job_task_name = malloc(strlen(job_id) + strlen(task_id) + 2);
			
 
				+  if (job_task_name == NULL) {
			
 
				+    fprintf(LOGFILE, "Malloc of job task name failed\n");
			
 
				+    result = -1;
			
 
				+  } else {
			
 
				+    sprintf(job_task_name, "%s/%s", job_id, task_id);
			
 
				+    char *log_dir = get_job_log_directory(job_task_name);
			
 
				+    free(job_task_name);
			
 
				+    if (log_dir == NULL) {
			
 
				+      result = -1;
			
 
				+    } else if (mkdirs(log_dir, perms) != 0) {
			
 
				+      result = -1;
			
 
				+    }
			
 
				+    free(log_dir);
			
 
				+  }
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Load the user information for a given user name.
			
 
				+ */
			
 
				+static struct passwd* get_user_info(const char* user) {
			
 
				+  int string_size = sysconf(_SC_GETPW_R_SIZE_MAX);
			
 
				+  void* buffer = malloc(string_size + sizeof(struct passwd));
			
 
				+  struct passwd *result = NULL;
			
 
				+  if (getpwnam_r(user, buffer, buffer + sizeof(struct passwd), string_size,
			
 
				+		 &result) != 0) {
			
 
				+    free(buffer);
			
 
				+    fprintf(LOGFILE, "Can't get user information %s - %s\n", user,
			
 
				+	    strerror(errno));
			
 
				+    return NULL;
			
 
				+  }
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Is the user a real user account?
			
 
				+ * Checks:
			
 
				+ *   1. Not root
			
 
				+ *   2. UID is above the minimum configured.
			
 
				+ *   3. Not in banned user list
			
 
				+ * Returns NULL on failure
			
 
				+ */
			
 
				+struct passwd* check_user(const char *user) {
			
 
				+  if (strcmp(user, "root") == 0) {
			
 
				+    fprintf(LOGFILE, "Running as root is not allowed\n");
			
 
				+    return NULL;
			
 
				+  }
			
 
				+  char *min_uid_str = get_value(MIN_USERID_KEY);
			
 
				+  int min_uid = DEFAULT_MIN_USERID;
			
 
				+  if (min_uid_str != NULL) {
			
 
				+    char *end_ptr = NULL;
			
 
				+    min_uid = strtol(min_uid_str, &end_ptr, 10);
			
 
				+    if (min_uid_str == end_ptr || *end_ptr != '\0') {
			
 
				+      fprintf(LOGFILE, "Illegal value of %s for %s in configuration\n", 
			
 
				+	      min_uid_str, MIN_USERID_KEY);
			
 
				+      free(min_uid_str);
			
 
				+      return NULL;
			
 
				+    }
			
 
				+    free(min_uid_str);
			
 
				+  }
			
 
				+  struct passwd *user_info = get_user_info(user);
			
 
				+  if (NULL == user_info) {
			
 
				+    fprintf(LOGFILE, "User %s not found\n", user);
			
 
				+    return NULL;
			
 
				+  }
			
 
				+  if (user_info->pw_uid < min_uid) {
			
 
				+    fprintf(LOGFILE, "Requested user %s has id %d, which is below the "
			
 
				+	    "minimum allowed %d\n", user, user_info->pw_uid, min_uid);
			
 
				+    free(user_info);
			
 
				+    return NULL;
			
 
				+  }
			
 
				+  char **banned_users = get_values(BANNED_USERS_KEY);
			
 
				+  char **banned_user = (banned_users == NULL) ? 
			
 
				+    (char**) DEFAULT_BANNED_USERS : banned_users;
			
 
				+  for(; *banned_user; ++banned_user) {
			
 
				+    if (strcmp(*banned_user, user) == 0) {
			
 
				+      free(user_info);
			
 
				+      fprintf(LOGFILE, "Requested user %s is banned\n", user);
			
 
				+      return NULL;
			
 
				+    }
			
 
				+  }
			
 
				+  if (banned_users != NULL) {
			
 
				+    free_values(banned_users);
			
 
				+  }
			
 
				+  return user_info;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * function used to populate and user_details structure.
			
 
				+ */
			
 
				+int set_user(const char *user) {
			
 
				+  // free any old user
			
 
				+  if (user_detail != NULL) {
			
 
				+    free(user_detail);
			
 
				+    user_detail = NULL;
			
 
				+  }
			
 
				+  user_detail = check_user(user);
			
 
				+  if (user_detail == NULL) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  return change_effective_user(user_detail->pw_uid, user_detail->pw_gid);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Change the ownership of the given file or directory to the new user.
			
 
				+ */
			
 
				+static int change_owner(const char* path, uid_t user, gid_t group) {
			
 
				+  if (geteuid() == user && getegid() == group) {
			
 
				+    return 0;
			
 
				+  } else {
			
 
				+    uid_t old_user = geteuid();
			
 
				+    gid_t old_group = getegid();
			
 
				+    if (change_effective_user(0, group) != 0) {
			
 
				+      return -1;
			
 
				+    }
			
 
				+    if (chown(path, user, group) != 0) {
			
 
				+      fprintf(LOGFILE, "Can't chown %s to %d:%d - %s\n", path, user, group,
			
 
				+	      strerror(errno));
			
 
				+      return -1;
			
 
				+    }
			
 
				+    return change_effective_user(old_user, old_group);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Create a top level directory for the user.
			
 
				+ * It assumes that the parent directory is *not* writable by the user.
			
 
				+ * It creates directories with 02700 permissions owned by the user
			
 
				+ * and with the group set to the task tracker group.
			
 
				+ * return non-0 on failure
			
 
				+ */
			
 
				+int create_directory_for_user(const char* path) {
			
 
				+  // set 2750 permissions and group sticky bit
			
 
				+  mode_t permissions = S_IRWXU | S_IRGRP | S_IXGRP | S_ISGID;
			
 
				+  uid_t user = geteuid();
			
 
				+  gid_t group = getegid();
			
 
				+  int ret = 0;
			
 
				+  ret = change_effective_user(tt_uid, tt_gid);
			
 
				+  if (ret == 0) {
			
 
				+    if (mkdir(path, permissions) == 0) {
			
 
				+      // need to reassert the group sticky bit
			
 
				+      if (chmod(path, permissions) != 0) {
			
 
				+        fprintf(LOGFILE, "Can't chmod %s to add the sticky bit - %s\n",
			
 
				+                path, strerror(errno));
			
 
				+        ret = -1;
			
 
				+      } else if (change_owner(path, user, tt_gid) != 0) {
			
 
				+        ret = -1;
			
 
				+      }
			
 
				+    } else if (errno == EEXIST) {
			
 
				+      struct stat file_stat;
			
 
				+      if (stat(path, &file_stat) != 0) {
			
 
				+        fprintf(LOGFILE, "Can't stat directory %s - %s\n", path, 
			
 
				+                strerror(errno));
			
 
				+        ret = -1;
			
 
				+      } else {
			
 
				+        if (file_stat.st_uid != user ||
			
 
				+            file_stat.st_gid != tt_gid) {
			
 
				+          fprintf(LOGFILE, "Directory %s owned by wrong user or group. "
			
 
				+                  "Expected %d:%d and found %d:%d.\n",
			
 
				+                  path, user, tt_gid, file_stat.st_uid, file_stat.st_gid);
			
 
				+          ret = -1;
			
 
				+        }
			
 
				+      }
			
 
				+    } else {
			
 
				+      fprintf(LOGFILE, "Failed to create directory %s - %s\n", path,
			
 
				+              strerror(errno));
			
 
				+      ret = -1;
			
 
				+    }
			
 
				+  }
			
 
				+  if (change_effective_user(user, group) != 0) {
			
 
				+    ret = -1;
			
 
				+  }
			
 
				+  return ret;
			
 
				+}
			
 
				+                            
			
 
				+/**
			
 
				+ * Open a file as the tasktracker and return a file descriptor for it.
			
 
				+ * Returns -1 on error
			
 
				+ */
			
 
				+static int open_file_as_task_tracker(const char* filename) {
			
 
				+  uid_t user = geteuid();
			
 
				+  gid_t group = getegid();
			
 
				+  if (change_effective_user(tt_uid, tt_gid) != 0) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  int result = open(filename, O_RDONLY);
			
 
				+  if (result == -1) {
			
 
				+    fprintf(LOGFILE, "Can't open file %s as task tracker - %s\n", filename,
			
 
				+	    strerror(errno));
			
 
				+  }
			
 
				+  if (change_effective_user(user, group)) {
			
 
				+    result = -1;
			
 
				+  }
			
 
				+  return result;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Copy a file from a fd to a given filename.
			
 
				+ * The new file must not exist and it is created with permissions perm.
			
 
				+ * The input stream is closed.
			
 
				+ * Return 0 if everything is ok.
			
 
				+ */
			
 
				+static int copy_file(int input, const char* in_filename, 
			
 
				+		     const char* out_filename, mode_t perm) {
			
 
				+  const int buffer_size = 128*1024;
			
 
				+  char buffer[buffer_size];
			
 
				+  int out_fd = open(out_filename, O_WRONLY|O_CREAT|O_EXCL|O_NOFOLLOW, perm);
			
 
				+  if (out_fd == -1) {
			
 
				+    fprintf(LOGFILE, "Can't open %s for output - %s\n", out_filename, 
			
 
				+            strerror(errno));
			
 
				+    return -1;
			
 
				+  }
			
 
				+  ssize_t len = read(input, buffer, buffer_size);
			
 
				+  while (len > 0) {
			
 
				+    ssize_t pos = 0;
			
 
				+    while (pos < len) {
			
 
				+      ssize_t write_result = write(out_fd, buffer + pos, len - pos);
			
 
				+      if (write_result <= 0) {
			
 
				+	fprintf(LOGFILE, "Error writing to %s - %s\n", out_filename,
			
 
				+		strerror(errno));
			
 
				+	close(out_fd);
			
 
				+	return -1;
			
 
				+      }
			
 
				+      pos += write_result;
			
 
				+    }
			
 
				+    len = read(input, buffer, buffer_size);
			
 
				+  }
			
 
				+  if (len < 0) {
			
 
				+    fprintf(LOGFILE, "Failed to read file %s - %s\n", in_filename, 
			
 
				+	    strerror(errno));
			
 
				+    close(out_fd);
			
 
				+    return -1;
			
 
				+  }
			
 
				+  if (close(out_fd) != 0) {
			
 
				+    fprintf(LOGFILE, "Failed to close file %s - %s\n", out_filename, 
			
 
				+	    strerror(errno));
			
 
				+    return -1;
			
 
				+  }
			
 
				+  close(input);
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Function to initialize the user directories of a user.
			
 
				+ */
			
 
				+int initialize_user(const char *user) {
			
 
				+  char **local_dir = get_values(TT_SYS_DIR_KEY);
			
 
				+  if (local_dir == NULL) {
			
 
				+    fprintf(LOGFILE, "%s is not configured.\n", TT_SYS_DIR_KEY);
			
 
				+    return INVALID_TT_ROOT;
			
 
				+  }
			
 
				+
			
 
				+  char *user_dir;
			
 
				+  char **local_dir_ptr = local_dir;
			
 
				+  int failed = 0;
			
 
				+  for(local_dir_ptr = local_dir; *local_dir_ptr != 0; ++local_dir_ptr) {
			
 
				+    user_dir = get_user_directory(*local_dir_ptr, user);
			
 
				+    if (user_dir == NULL) {
			
 
				+      fprintf(LOGFILE, "Couldn't get userdir directory for %s.\n", user);
			
 
				+      failed = 1;
			
 
				+      break;
			
 
				+    }
			
 
				+    if (create_directory_for_user(user_dir) != 0) {
			
 
				+      failed = 1;
			
 
				+    }
			
 
				+    free(user_dir);
			
 
				+  }
			
 
				+  free_values(local_dir);
			
 
				+  return failed ? INITIALIZE_USER_FAILED : 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Function to prepare the job directories for the task JVM.
			
 
				+ */
			
 
				+int initialize_job(const char *user, const char *jobid, 
			
 
				+		   const char* credentials, const char* job_xml,
			
 
				+                   char* const* args) {
			
 
				+  if (jobid == NULL || user == NULL) {
			
 
				+    fprintf(LOGFILE, "Either jobid is null or the user passed is null.\n");
			
 
				+    return INVALID_ARGUMENT_NUMBER;
			
 
				+  }
			
 
				+
			
 
				+  // create the user directory
			
 
				+  int result = initialize_user(user);
			
 
				+  if (result != 0) {
			
 
				+    return result;
			
 
				+  }
			
 
				+
			
 
				+  // create the log directory for the job
			
 
				+  char *job_log_dir = get_job_log_directory(jobid);
			
 
				+  if (job_log_dir == NULL) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  result = create_directory_for_user(job_log_dir);
			
 
				+  free(job_log_dir);
			
 
				+  if (result != 0) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // open up the credentials file
			
 
				+  int cred_file = open_file_as_task_tracker(credentials);
			
 
				+  if (cred_file == -1) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  int job_file = open_file_as_task_tracker(job_xml);
			
 
				+  if (job_file == -1) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // give up root privs
			
 
				+  if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  // 750
			
 
				+  mode_t permissions = S_IRWXU | S_IRGRP | S_IXGRP;
			
 
				+  char **tt_roots = get_values(TT_SYS_DIR_KEY);
			
 
				+
			
 
				+  if (tt_roots == NULL) {
			
 
				+    return INVALID_CONFIG_FILE;
			
 
				+  }
			
 
				+
			
 
				+  char **tt_root;
			
 
				+  char *primary_job_dir = NULL;
			
 
				+  for(tt_root=tt_roots; *tt_root != NULL; ++tt_root) {
			
 
				+    char *job_dir = get_job_directory(*tt_root, user, jobid);
			
 
				+    if (job_dir == NULL) {
			
 
				+      // try the next one
			
 
				+    } else if (mkdirs(job_dir, permissions) != 0) {
			
 
				+      free(job_dir);
			
 
				+    } else if (primary_job_dir == NULL) {
			
 
				+      primary_job_dir = job_dir;
			
 
				+    } else {
			
 
				+      free(job_dir);
			
 
				+    }
			
 
				+  }
			
 
				+  free_values(tt_roots);
			
 
				+  if (primary_job_dir == NULL) {
			
 
				+    fprintf(LOGFILE, "Did not create any job directories\n");
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  char *cred_file_name = concatenate("%s/%s", "cred file", 2,
			
 
				+				     primary_job_dir, CREDENTIALS_FILENAME);
			
 
				+  if (cred_file_name == NULL) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  if (copy_file(cred_file, credentials, cred_file_name, S_IRUSR|S_IWUSR) != 0){
			
 
				+    return -1;
			
 
				+  }
			
 
				+  char *job_file_name = concatenate("%s/%s", "job file", 2,
			
 
				+				     primary_job_dir, JOB_FILENAME);
			
 
				+  if (job_file_name == NULL) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  if (copy_file(job_file, job_xml, job_file_name,
			
 
				+        S_IRUSR|S_IWUSR|S_IRGRP) != 0) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  fclose(stdin);
			
 
				+  fflush(LOGFILE);
			
 
				+  if (LOGFILE != stdout) {
			
 
				+    fclose(stdout);
			
 
				+  }
			
 
				+  fclose(stderr);
			
 
				+  chdir(primary_job_dir);
			
 
				+  execvp(args[0], args);
			
 
				+  fprintf(LOGFILE, "Failure to exec job initialization process - %s\n",
			
 
				+	  strerror(errno));
			
 
				+  return -1;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Function used to launch a task as the provided user. It does the following :
			
 
				+ * 1) Creates attempt work dir and log dir to be accessible by the child
			
 
				+ * 2) Copies the script file from the TT to the work directory
			
 
				+ * 3) Sets up the environment
			
 
				+ * 4) Does an execlp on the same in order to replace the current image with
			
 
				+ *    task image.
			
 
				+ */
			
 
				+int run_task_as_user(const char *user, const char *job_id, 
			
 
				+                     const char *task_id, const char *work_dir,
			
 
				+                     const char *script_name) {
			
 
				+  int exit_code = -1;
			
 
				+  char *task_script_path = NULL;
			
 
				+  if (create_attempt_directories(user, job_id, task_id) != 0) {
			
 
				+    goto cleanup;
			
 
				+  }
			
 
				+  int task_file_source = open_file_as_task_tracker(script_name);
			
 
				+  if (task_file_source == -1) {
			
 
				+    goto cleanup;
			
 
				+  }
			
 
				+  task_script_path = get_task_launcher_file(work_dir);
			
 
				+  if (task_script_path == NULL) {
			
 
				+    exit_code = OUT_OF_MEMORY;
			
 
				+    goto cleanup;
			
 
				+  }
			
 
				+  if (copy_file(task_file_source, script_name,task_script_path,S_IRWXU) != 0) {
			
 
				+    goto cleanup;
			
 
				+  }
			
 
				+
			
 
				+  //change the user
			
 
				+  fcloseall();
			
 
				+  umask(0027);
			
 
				+  if (chdir(work_dir) != 0) {
			
 
				+    fprintf(LOGFILE, "Can't change directory to %s -%s\n", work_dir,
			
 
				+	    strerror(errno));
			
 
				+    goto cleanup;
			
 
				+  }
			
 
				+  if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) {
			
 
				+    exit_code = SETUID_OPER_FAILED;
			
 
				+    goto cleanup;
			
 
				+  }
			
 
				+
			
 
				+  if (execlp(task_script_path, task_script_path, NULL) != 0) {
			
 
				+    fprintf(LOGFILE, "Couldn't execute the task jvm file %s - %s", 
			
 
				+            task_script_path, strerror(errno));
			
 
				+    exit_code = UNABLE_TO_EXECUTE_TASK_SCRIPT;
			
 
				+    goto cleanup;
			
 
				+  }
			
 
				+  exit_code = 0;
			
 
				+
			
 
				+ cleanup:
			
 
				+  free(task_script_path);
			
 
				+  return exit_code;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Function used to signal a task launched by the user.
			
 
				+ * The function sends appropriate signal to the process group
			
 
				+ * specified by the task_pid.
			
 
				+ */
			
 
				+int signal_user_task(const char *user, int pid, int sig) {
			
 
				+  if(pid <= 0) {
			
 
				+    return INVALID_TASK_PID;
			
 
				+  }
			
 
				+
			
 
				+  if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) {
			
 
				+    return SETUID_OPER_FAILED;
			
 
				+  }
			
 
				+
			
 
				+  //Don't continue if the process-group is not alive anymore.
			
 
				+  int has_group = 1;
			
 
				+  if (kill(-pid,0) < 0) {
			
 
				+    if (kill(pid, 0) < 0) {
			
 
				+      if (errno == ESRCH) {
			
 
				+        return INVALID_TASK_PID;
			
 
				+      }
			
 
				+      fprintf(LOGFILE, "Error signalling task %d with %d - %s\n",
			
 
				+	      pid, sig, strerror(errno));
			
 
				+      return -1;
			
 
				+    } else {
			
 
				+      has_group = 0;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  if (kill((has_group ? -1 : 1) * pid, sig) < 0) {
			
 
				+    if(errno != ESRCH) {
			
 
				+      fprintf(LOGFILE, 
			
 
				+              "Error signalling process group %d with signal %d - %s\n", 
			
 
				+              -pid, sig, strerror(errno));
			
 
				+      return UNABLE_TO_KILL_TASK;
			
 
				+    } else {
			
 
				+      return INVALID_TASK_PID;
			
 
				+    }
			
 
				+  }
			
 
				+  fprintf(LOGFILE, "Killing process %s%d with %d\n",
			
 
				+	  (has_group ? "group " :""), pid, sig);
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Delete a final directory as the task tracker user.
			
 
				+ */
			
 
				+static int rmdir_as_tasktracker(const char* path) {
			
 
				+  int user_uid = geteuid();
			
 
				+  int user_gid = getegid();
			
 
				+  int ret = change_effective_user(tt_uid, tt_gid);
			
 
				+  if (ret == 0) {
			
 
				+    if (rmdir(path) != 0) {
			
 
				+      fprintf(LOGFILE, "rmdir of %s failed - %s\n", path, strerror(errno));
			
 
				+      ret = -1;
			
 
				+    }
			
 
				+  }
			
 
				+  // always change back
			
 
				+  if (change_effective_user(user_uid, user_gid) != 0) {
			
 
				+    ret = -1;
			
 
				+  }
			
 
				+  return ret;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Recursively delete the given path.
			
 
				+ * full_path : the path to delete
			
 
				+ * needs_tt_user: the top level directory must be deleted by the tt user.
			
 
				+ */
			
 
				+static int delete_path(const char *full_path, 
			
 
				+                       int needs_tt_user) {
			
 
				+  int exit_code = 0;
			
 
				+
			
 
				+  if (full_path == NULL) {
			
 
				+    fprintf(LOGFILE, "Path is null\n");
			
 
				+    exit_code = UNABLE_TO_BUILD_PATH; // may be malloc failed
			
 
				+  } else {
			
 
				+    char *(paths[]) = {strdup(full_path), 0};
			
 
				+    if (paths[0] == NULL) {
			
 
				+      fprintf(LOGFILE, "Malloc failed in delete_path\n");
			
 
				+      return -1;
			
 
				+    }
			
 
				+    // check to make sure the directory exists
			
 
				+    if (access(full_path, F_OK) != 0) {
			
 
				+      if (errno == ENOENT) {
			
 
				+        free(paths[0]);
			
 
				+        return 0;
			
 
				+      }
			
 
				+    }
			
 
				+    FTS* tree = fts_open(paths, FTS_PHYSICAL | FTS_XDEV, NULL);
			
 
				+    FTSENT* entry = NULL;
			
 
				+    int ret = 0;
			
 
				+
			
 
				+    if (tree == NULL) {
			
 
				+      fprintf(LOGFILE,
			
 
				+              "Cannot open file traversal structure for the path %s:%s.\n", 
			
 
				+              full_path, strerror(errno));
			
 
				+      free(paths[0]);
			
 
				+      return -1;
			
 
				+    }
			
 
				+    while (((entry = fts_read(tree)) != NULL) && exit_code == 0) {
			
 
				+      switch (entry->fts_info) {
			
 
				+
			
 
				+      case FTS_DP:        // A directory being visited in post-order
			
 
				+        if (!needs_tt_user ||
			
 
				+            strcmp(entry->fts_path, full_path) != 0) {
			
 
				+          if (rmdir(entry->fts_accpath) != 0) {
			
 
				+            fprintf(LOGFILE, "Couldn't delete directory %s - %s\n", 
			
 
				+                    entry->fts_path, strerror(errno));
			
 
				+            exit_code = -1;
			
 
				+          }
			
 
				+        }
			
 
				+        break;
			
 
				+
			
 
				+      case FTS_F:         // A regular file
			
 
				+      case FTS_SL:        // A symbolic link
			
 
				+      case FTS_SLNONE:    // A broken symbolic link
			
 
				+      case FTS_DEFAULT:   // Unknown type of file
			
 
				+        if (unlink(entry->fts_accpath) != 0) {
			
 
				+          fprintf(LOGFILE, "Couldn't delete file %s - %s\n", entry->fts_path,
			
 
				+                  strerror(errno));
			
 
				+          exit_code = -1;
			
 
				+        }
			
 
				+        break;
			
 
				+
			
 
				+      case FTS_DNR:       // Unreadable directory
			
 
				+        fprintf(LOGFILE, "Unreadable directory %s. Skipping..\n", 
			
 
				+                entry->fts_path);
			
 
				+        break;
			
 
				+
			
 
				+      case FTS_D:         // A directory in pre-order
			
 
				+        // if the directory isn't readable, chmod it
			
 
				+        if ((entry->fts_statp->st_mode & 0200) == 0) {
			
 
				+          fprintf(LOGFILE, "Unreadable directory %s, chmoding.\n", 
			
 
				+                  entry->fts_path);
			
 
				+          if (chmod(entry->fts_accpath, 0700) != 0) {
			
 
				+            fprintf(LOGFILE, "Error chmoding %s - %s, continuing\n", 
			
 
				+                    entry->fts_path, strerror(errno));
			
 
				+          }
			
 
				+        }
			
 
				+        break;
			
 
				+
			
 
				+      case FTS_NS:        // A file with no stat(2) information
			
 
				+        // usually a root directory that doesn't exist
			
 
				+        fprintf(LOGFILE, "Directory not found %s\n", entry->fts_path);
			
 
				+        break;
			
 
				+
			
 
				+      case FTS_DC:        // A directory that causes a cycle
			
 
				+      case FTS_DOT:       // A dot directory
			
 
				+      case FTS_NSOK:      // No stat information requested
			
 
				+        break;
			
 
				+
			
 
				+      case FTS_ERR:       // Error return
			
 
				+        fprintf(LOGFILE, "Error traversing directory %s - %s\n", 
			
 
				+                entry->fts_path, strerror(entry->fts_errno));
			
 
				+        exit_code = -1;
			
 
				+        break;
			
 
				+        break;
			
 
				+      default:
			
 
				+        exit_code = -1;
			
 
				+        break;
			
 
				+      }
			
 
				+    }
			
 
				+    ret = fts_close(tree);
			
 
				+    if (exit_code == 0 && ret != 0) {
			
 
				+      fprintf(LOGFILE, "Error in fts_close while deleting %s\n", full_path);
			
 
				+      exit_code = -1;
			
 
				+    }
			
 
				+    if (needs_tt_user) {
			
 
				+      // If the delete failed, try a final rmdir as root on the top level.
			
 
				+      // That handles the case where the top level directory is in a directory
			
 
				+      // that is owned by the task tracker.
			
 
				+      exit_code = rmdir_as_tasktracker(full_path);
			
 
				+    }
			
 
				+    free(paths[0]);
			
 
				+  }
			
 
				+  return exit_code;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Delete the given directory as the user from each of the tt_root directories
			
 
				+ * user: the user doing the delete
			
 
				+ * subdir: the subdir to delete
			
 
				+ */
			
 
				+int delete_as_user(const char *user,
			
 
				+                   const char *subdir) {
			
 
				+  int ret = 0;
			
 
				+
			
 
				+  char** tt_roots = get_values(TT_SYS_DIR_KEY);
			
 
				+  char** ptr;
			
 
				+  if (tt_roots == NULL || *tt_roots == NULL) {
			
 
				+    fprintf(LOGFILE, "No %s defined in the configuration\n", TT_SYS_DIR_KEY);
			
 
				+    return INVALID_CONFIG_FILE;
			
 
				+  }
			
 
				+
			
 
				+  // do the delete
			
 
				+  for(ptr = tt_roots; *ptr != NULL; ++ptr) {
			
 
				+    char* full_path = get_user_subdirectory(*ptr, user, subdir);
			
 
				+    if (full_path == NULL) {
			
 
				+      return -1;
			
 
				+    }
			
 
				+    int this_ret = delete_path(full_path, strlen(subdir) == 0);
			
 
				+    free(full_path);
			
 
				+    // delete as much as we can, but remember the error
			
 
				+    if (this_ret != 0) {
			
 
				+      ret = this_ret;
			
 
				+    }
			
 
				+  }
			
 
				+  free_values(tt_roots);
			
 
				+  return ret;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * delete a given log directory
			
 
				+ */
			
 
				+int delete_log_directory(const char *subdir) {
			
 
				+  char* log_subdir = get_job_log_directory(subdir);
			
 
				+  int ret = -1;
			
 
				+  if (log_subdir != NULL) {
			
 
				+    ret = delete_path(log_subdir, strchr(subdir, '/') == NULL);
			
 
				+  }
			
 
				+  free(log_subdir);
			
 
				+  return ret;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * run command as user
			
 
				+ */
			
 
				+int run_command_as_user(const char *user, char* const* args) {
			
 
				+  if (user == NULL) {
			
 
				+    fprintf(LOGFILE, "The user passed is null.\n");
			
 
				+    return INVALID_ARGUMENT_NUMBER;
			
 
				+  }
			
 
				+  // give up root privs
			
 
				+  if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+  execvp(args[0], args);
			
 
				+  fprintf(LOGFILE, "Failure to exec command - %s\n",
			
 
				+	  strerror(errno));
			
 
				+  return -1;
			
 
				+} 
			
--- a/src/c++/task-controller/impl/task-controller.h
+++ b/src/c++/task-controller/impl/task-controller.h
@@ -0,0 +1,154 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+#include <pwd.h>
			
 
				+#include <stdio.h>
			
 
				+#include <sys/types.h>
			
 
				+
			
 
				+//command definitions
			
 
				+enum command {
			
 
				+  INITIALIZE_JOB = 0,
			
 
				+  LAUNCH_TASK_JVM = 1,
			
 
				+  SIGNAL_TASK = 2,
			
 
				+  DELETE_AS_USER = 3,
			
 
				+  DELETE_LOG_AS_USER = 4,
			
 
				+  RUN_COMMAND_AS_USER = 5
			
 
				+};
			
 
				+
			
 
				+enum errorcodes {
			
 
				+  INVALID_ARGUMENT_NUMBER = 1,
			
 
				+  INVALID_USER_NAME, //2
			
 
				+  INVALID_COMMAND_PROVIDED, //3
			
 
				+  SUPER_USER_NOT_ALLOWED_TO_RUN_TASKS, //4
			
 
				+  INVALID_TT_ROOT, //5
			
 
				+  SETUID_OPER_FAILED, //6
			
 
				+  UNABLE_TO_EXECUTE_TASK_SCRIPT, //7
			
 
				+  UNABLE_TO_KILL_TASK, //8
			
 
				+  INVALID_TASK_PID, //9
			
 
				+  ERROR_RESOLVING_FILE_PATH, //10
			
 
				+  RELATIVE_PATH_COMPONENTS_IN_FILE_PATH, //11
			
 
				+  UNABLE_TO_STAT_FILE, //12
			
 
				+  FILE_NOT_OWNED_BY_TASKTRACKER, //13
			
 
				+  PREPARE_ATTEMPT_DIRECTORIES_FAILED, //14
			
 
				+  INITIALIZE_JOB_FAILED, //15
			
 
				+  PREPARE_TASK_LOGS_FAILED, //16
			
 
				+  INVALID_TT_LOG_DIR, //17
			
 
				+  OUT_OF_MEMORY, //18
			
 
				+  INITIALIZE_DISTCACHEFILE_FAILED, //19
			
 
				+  INITIALIZE_USER_FAILED, //20
			
 
				+  UNABLE_TO_BUILD_PATH, //21
			
 
				+  INVALID_TASKCONTROLLER_PERMISSIONS, //22
			
 
				+  PREPARE_JOB_LOGS_FAILED, //23
			
 
				+  INVALID_CONFIG_FILE, // 24
			
 
				+};
			
 
				+
			
 
				+#define TT_GROUP_KEY "mapreduce.tasktracker.group"
			
 
				+
			
 
				+extern struct passwd *user_detail;
			
 
				+
			
 
				+// the log file for error messages
			
 
				+extern FILE *LOGFILE;
			
 
				+
			
 
				+// get the executable's filename
			
 
				+char* get_executable();
			
 
				+
			
 
				+int check_taskcontroller_permissions(char *executable_file);
			
 
				+
			
 
				+/**
			
 
				+ * delete a given log directory as a user
			
 
				+ */
			
 
				+int delete_log_directory(const char *log_dir);
			
 
				+
			
 
				+// initialize the job directory
			
 
				+int initialize_job(const char *user, const char *jobid,
			
 
				+                   const char *credentials, 
			
 
				+                   const char *job_xml, char* const* args);
			
 
				+
			
 
				+// run the task as the user
			
 
				+int run_task_as_user(const char * user, const char *jobid, const char *taskid,
			
 
				+                     const char *work_dir, const char *script_name);
			
 
				+
			
 
				+// send a signal as the user
			
 
				+int signal_user_task(const char *user, int pid, int sig);
			
 
				+
			
 
				+// delete a directory (or file) recursively as the user.
			
 
				+int delete_as_user(const char *user,
			
 
				+                   const char *dir_to_be_deleted);
			
 
				+
			
 
				+// run a command as the user
			
 
				+int run_command_as_user(const char *user,
			
 
				+                        char* const* args); 
			
 
				+
			
 
				+// set the task tracker's uid and gid
			
 
				+void set_tasktracker_uid(uid_t user, gid_t group);
			
 
				+
			
 
				+/**
			
 
				+ * Is the user a real user account?
			
 
				+ * Checks:
			
 
				+ *   1. Not root
			
 
				+ *   2. UID is above the minimum configured.
			
 
				+ *   3. Not in banned user list
			
 
				+ * Returns NULL on failure
			
 
				+ */
			
 
				+struct passwd* check_user(const char *user);
			
 
				+
			
 
				+// set the user
			
 
				+int set_user(const char *user);
			
 
				+
			
 
				+// methods to get the directories
			
 
				+
			
 
				+char *get_user_directory(const char *tt_root, const char *user);
			
 
				+
			
 
				+char *get_job_directory(const char * tt_root, const char *user,
			
 
				+                        const char *jobid);
			
 
				+
			
 
				+char *get_attempt_work_directory(const char *tt_root, const char *user,
			
 
				+				 const char *job_dir, const char *attempt_id);
			
 
				+
			
 
				+char *get_task_launcher_file(const char* work_dir);
			
 
				+
			
 
				+/**
			
 
				+ * Get the job log directory.
			
 
				+ * Ensures that the result is a realpath and that it is underneath the 
			
 
				+ * tt log root.
			
 
				+ */
			
 
				+char* get_job_log_directory(const char* jobid);
			
 
				+
			
 
				+char *get_task_log_dir(const char *log_dir, const char *job_id, 
			
 
				+                       const char *attempt_id);
			
 
				+
			
 
				+/**
			
 
				+ * Ensure that the given path and all of the parent directories are created
			
 
				+ * with the desired permissions.
			
 
				+ */
			
 
				+int mkdirs(const char* path, mode_t perm);
			
 
				+
			
 
				+/**
			
 
				+ * Function to initialize the user directories of a user.
			
 
				+ */
			
 
				+int initialize_user(const char *user);
			
 
				+
			
 
				+/**
			
 
				+ * Create a top level directory for the user.
			
 
				+ * It assumes that the parent directory is *not* writable by the user.
			
 
				+ * It creates directories with 02700 permissions owned by the user
			
 
				+ * and with the group set to the task tracker group.
			
 
				+ * return non-0 on failure
			
 
				+ */
			
 
				+int create_directory_for_user(const char* path);
			
 
				+
			
 
				+int change_user(uid_t user, gid_t group);
			
--- a/src/c++/task-controller/test/test-task-controller.c
+++ b/src/c++/task-controller/test/test-task-controller.c
@@ -0,0 +1,763 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+#include "configuration.h"
			
 
				+#include "task-controller.h"
			
 
				+
			
 
				+#include <errno.h>
			
 
				+#include <fcntl.h>
			
 
				+#include <unistd.h>
			
 
				+#include <signal.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#include <sys/stat.h>
			
 
				+#include <sys/wait.h>
			
 
				+
			
 
				+#define TEST_ROOT "/tmp/test-task-controller"
			
 
				+#define DONT_TOUCH_FILE "dont-touch-me"
			
 
				+
			
 
				+static char* username = NULL;
			
 
				+
			
 
				+/**
			
 
				+ * Run the command using the effective user id.
			
 
				+ * It can't use system, since bash seems to copy the real user id into the
			
 
				+ * effective id.
			
 
				+ */
			
 
				+void run(const char *cmd) {
			
 
				+  fflush(stdout);
			
 
				+  fflush(stderr);
			
 
				+  pid_t child = fork();
			
 
				+  if (child == -1) {
			
 
				+    printf("FAIL: failed to fork - %s\n", strerror(errno));
			
 
				+  } else if (child == 0) {
			
 
				+    char *cmd_copy = strdup(cmd);
			
 
				+    char *ptr;
			
 
				+    int words = 1;
			
 
				+    for(ptr = strchr(cmd_copy, ' ');  ptr; ptr = strchr(ptr+1, ' ')) {
			
 
				+      words += 1;
			
 
				+    }
			
 
				+    char **argv = malloc(sizeof(char *) * (words + 1));
			
 
				+    ptr = strtok(cmd_copy, " ");
			
 
				+    int i = 0;
			
 
				+    argv[i++] = ptr;
			
 
				+    while (ptr != NULL) {
			
 
				+      ptr = strtok(NULL, " ");
			
 
				+      argv[i++] = ptr;
			
 
				+    }
			
 
				+    if (execvp(argv[0], argv) != 0) {
			
 
				+      printf("FAIL: exec failed in child %s - %s\n", cmd, strerror(errno));
			
 
				+      exit(42);
			
 
				+    }
			
 
				+  } else {
			
 
				+    int status = 0;
			
 
				+    if (waitpid(child, &status, 0) <= 0) {
			
 
				+      printf("FAIL: failed waiting for child process %s pid %d - %s\n", 
			
 
				+	     cmd, child, strerror(errno));
			
 
				+      exit(1);
			
 
				+    }
			
 
				+    if (!WIFEXITED(status)) {
			
 
				+      printf("FAIL: process %s pid %d did not exit\n", cmd, child);
			
 
				+      exit(1);
			
 
				+    }
			
 
				+    if (WEXITSTATUS(status) != 0) {
			
 
				+      printf("FAIL: process %s pid %d exited with error status %d\n", cmd, 
			
 
				+	     child, WEXITSTATUS(status));
			
 
				+      exit(1);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+int write_config_file(char *file_name) {
			
 
				+  FILE *file;
			
 
				+  file = fopen(file_name, "w");
			
 
				+  if (file == NULL) {
			
 
				+    printf("Failed to open %s.\n", file_name);
			
 
				+    return EXIT_FAILURE;
			
 
				+  }
			
 
				+  fprintf(file, "mapred.local.dir=" TEST_ROOT "/local-1");
			
 
				+  int i;
			
 
				+  for(i=2; i < 5; ++i) {
			
 
				+    fprintf(file, "," TEST_ROOT "/local-%d", i);
			
 
				+  }
			
 
				+  fprintf(file, "\n");
			
 
				+  fprintf(file, "hadoop.log.dir=" TEST_ROOT "/logs\n");
			
 
				+  fclose(file);
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+void create_tt_roots() {
			
 
				+  char** tt_roots = get_values("mapred.local.dir");
			
 
				+  char** tt_root;
			
 
				+  for(tt_root=tt_roots; *tt_root != NULL; ++tt_root) {
			
 
				+    if (mkdir(*tt_root, 0755) != 0) {
			
 
				+      printf("FAIL: Can't create directory %s - %s\n", *tt_root,
			
 
				+	     strerror(errno));
			
 
				+      exit(1);
			
 
				+    }
			
 
				+    char buffer[100000];
			
 
				+    sprintf(buffer, "%s/taskTracker", *tt_root);
			
 
				+    if (mkdir(buffer, 0755) != 0) {
			
 
				+      printf("FAIL: Can't create directory %s - %s\n", buffer,
			
 
				+	     strerror(errno));
			
 
				+      exit(1);
			
 
				+    }
			
 
				+  }
			
 
				+  free_values(tt_roots);
			
 
				+}
			
 
				+
			
 
				+void test_get_user_directory() {
			
 
				+  char *user_dir = get_user_directory("/tmp", "user");
			
 
				+  char *expected = "/tmp/taskTracker/user";
			
 
				+  if (strcmp(user_dir, expected) != 0) {
			
 
				+    printf("test_get_user_directory expected %s got %s\n", user_dir, expected);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(user_dir);
			
 
				+}
			
 
				+
			
 
				+void test_get_job_directory() {
			
 
				+  char *expected = "/tmp/taskTracker/user/jobcache/job_200906101234_0001";
			
 
				+  char *job_dir = (char *) get_job_directory("/tmp", "user",
			
 
				+      "job_200906101234_0001");
			
 
				+  if (strcmp(job_dir, expected) != 0) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(job_dir);
			
 
				+}
			
 
				+
			
 
				+void test_get_attempt_directory() {
			
 
				+  char *attempt_dir = get_attempt_work_directory("/tmp", "owen", "job_1",
			
 
				+						 "attempt_1");
			
 
				+  char *expected = "/tmp/taskTracker/owen/jobcache/job_1/attempt_1/work";
			
 
				+  if (strcmp(attempt_dir, expected) != 0) {
			
 
				+    printf("Fail get_attempt_work_directory got %s expected %s\n",
			
 
				+	   attempt_dir, expected);
			
 
				+  }
			
 
				+  free(attempt_dir);
			
 
				+}
			
 
				+
			
 
				+void test_get_task_launcher_file() {
			
 
				+  char *expected_file = ("/tmp/taskTracker/user/jobcache/job_200906101234_0001"
			
 
				+			 "/taskjvm.sh");
			
 
				+  char *job_dir = get_job_directory("/tmp", "user",
			
 
				+                                    "job_200906101234_0001");
			
 
				+  char *task_file =  get_task_launcher_file(job_dir);
			
 
				+  if (strcmp(task_file, expected_file) != 0) {
			
 
				+    printf("failure to match expected task file %s vs %s\n", task_file,
			
 
				+           expected_file);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(job_dir);
			
 
				+  free(task_file);
			
 
				+}
			
 
				+
			
 
				+void test_get_job_log_dir() {
			
 
				+  char *expected = TEST_ROOT "/logs/userlogs/job_200906101234_0001";
			
 
				+  char *logdir = get_job_log_directory("job_200906101234_0001");
			
 
				+  if (strcmp(logdir, expected) != 0) {
			
 
				+    printf("Fail get_job_log_dir got %s expected %s\n", logdir, expected);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(logdir);
			
 
				+}
			
 
				+
			
 
				+void test_get_task_log_dir() {
			
 
				+  char *logdir = get_job_log_directory("job_5/task_4");
			
 
				+  char *expected = TEST_ROOT "/logs/userlogs/job_5/task_4";
			
 
				+  if (strcmp(logdir, expected) != 0) {
			
 
				+    printf("FAIL: get_task_log_dir expected %s got %s\n", logdir, expected);
			
 
				+  }
			
 
				+  free(logdir);
			
 
				+}
			
 
				+
			
 
				+void test_check_user() {
			
 
				+  printf("\nTesting test_check_user\n");
			
 
				+  struct passwd *user = check_user(username);
			
 
				+  if (user == NULL) {
			
 
				+    printf("FAIL: failed check for user %s\n", username);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(user);
			
 
				+  if (check_user("lp") != NULL) {
			
 
				+    printf("FAIL: failed check for system user lp\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (check_user("root") != NULL) {
			
 
				+    printf("FAIL: failed check for system user root\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (check_user("mapred") != NULL) {
			
 
				+    printf("FAIL: failed check for hadoop user mapred\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void test_check_configuration_permissions() {
			
 
				+  printf("\nTesting check_configuration_permissions\n");
			
 
				+  if (check_configuration_permissions("/etc/passwd") != 0) {
			
 
				+    printf("FAIL: failed permission check on /etc/passwd\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (check_configuration_permissions(TEST_ROOT) == 0) {
			
 
				+    printf("FAIL: failed permission check on %s\n", TEST_ROOT);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void test_delete_task() {
			
 
				+  if (initialize_user(username)) {
			
 
				+    printf("FAIL: failed to initialized user %s\n", username);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  char* job_dir = get_job_directory(TEST_ROOT "/local-2", username, "job_1");
			
 
				+  char* dont_touch = get_job_directory(TEST_ROOT "/local-2", username, 
			
 
				+                                       DONT_TOUCH_FILE);
			
 
				+  char* task_dir = get_attempt_work_directory(TEST_ROOT "/local-2", 
			
 
				+					      username, "job_1", "task_1");
			
 
				+  char buffer[100000];
			
 
				+  sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", task_dir);
			
 
				+  run(buffer);
			
 
				+  sprintf(buffer, "touch %s", dont_touch);
			
 
				+  run(buffer);
			
 
				+
			
 
				+  // soft link to the canary file from the task directory
			
 
				+  sprintf(buffer, "ln -s %s %s/who/softlink", dont_touch, task_dir);
			
 
				+  run(buffer);
			
 
				+  // hard link to the canary file from the task directory
			
 
				+  sprintf(buffer, "ln %s %s/who/hardlink", dont_touch, task_dir);
			
 
				+  run(buffer);
			
 
				+  // create a dot file in the task directory
			
 
				+  sprintf(buffer, "touch %s/who/let/.dotfile", task_dir);
			
 
				+  run(buffer);
			
 
				+  // create a no permission file
			
 
				+  sprintf(buffer, "touch %s/who/let/protect", task_dir);
			
 
				+  run(buffer);
			
 
				+  sprintf(buffer, "chmod 000 %s/who/let/protect", task_dir);
			
 
				+  run(buffer);
			
 
				+  // create a no permission directory
			
 
				+  sprintf(buffer, "chmod 000 %s/who/let", task_dir);
			
 
				+  run(buffer);
			
 
				+
			
 
				+  // delete task directory
			
 
				+  int ret = delete_as_user(username, "jobcache/job_1/task_1");
			
 
				+  if (ret != 0) {
			
 
				+    printf("FAIL: return code from delete_as_user is %d\n", ret);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+
			
 
				+  // check to make sure the task directory is gone
			
 
				+  if (access(task_dir, R_OK) == 0) {
			
 
				+    printf("FAIL: failed to delete the directory - %s\n", task_dir);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  // check to make sure the job directory is not gone
			
 
				+  if (access(job_dir, R_OK) != 0) {
			
 
				+    printf("FAIL: accidently deleted the directory - %s\n", job_dir);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  // but that the canary is not gone
			
 
				+  if (access(dont_touch, R_OK) != 0) {
			
 
				+    printf("FAIL: accidently deleted file %s\n", dont_touch);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  sprintf(buffer, "chmod -R 700 %s", job_dir);
			
 
				+  run(buffer);
			
 
				+  sprintf(buffer, "rm -fr %s", job_dir);
			
 
				+  run(buffer);
			
 
				+  free(job_dir);
			
 
				+  free(task_dir);
			
 
				+  free(dont_touch);
			
 
				+}
			
 
				+
			
 
				+void test_delete_job() {
			
 
				+  char* job_dir = get_job_directory(TEST_ROOT "/local-2", username, "job_2");
			
 
				+  char* dont_touch = get_job_directory(TEST_ROOT "/local-2", username, 
			
 
				+                                       DONT_TOUCH_FILE);
			
 
				+  char* task_dir = get_attempt_work_directory(TEST_ROOT "/local-2", 
			
 
				+					      username, "job_2", "task_1");
			
 
				+  char buffer[100000];
			
 
				+  sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", task_dir);
			
 
				+  run(buffer);
			
 
				+  sprintf(buffer, "touch %s", dont_touch);
			
 
				+  run(buffer);
			
 
				+
			
 
				+  // soft link to the canary file from the task directory
			
 
				+  sprintf(buffer, "ln -s %s %s/who/softlink", dont_touch, task_dir);
			
 
				+  run(buffer);
			
 
				+  // hard link to the canary file from the task directory
			
 
				+  sprintf(buffer, "ln %s %s/who/hardlink", dont_touch, task_dir);
			
 
				+  run(buffer);
			
 
				+  // create a dot file in the task directory
			
 
				+  sprintf(buffer, "touch %s/who/let/.dotfile", task_dir);
			
 
				+  run(buffer);
			
 
				+  // create a no permission file
			
 
				+  sprintf(buffer, "touch %s/who/let/protect", task_dir);
			
 
				+  run(buffer);
			
 
				+  sprintf(buffer, "chmod 000 %s/who/let/protect", task_dir);
			
 
				+  run(buffer);
			
 
				+  // create a no permission directory
			
 
				+  sprintf(buffer, "chmod 000 %s/who/let", task_dir);
			
 
				+  run(buffer);
			
 
				+
			
 
				+  // delete task directory
			
 
				+  int ret = delete_as_user(username, "jobcache/job_2");
			
 
				+  if (ret != 0) {
			
 
				+    printf("FAIL: return code from delete_as_user is %d\n", ret);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+
			
 
				+  // check to make sure the task directory is gone
			
 
				+  if (access(task_dir, R_OK) == 0) {
			
 
				+    printf("FAIL: failed to delete the directory - %s\n", task_dir);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  // check to make sure the job directory is gone
			
 
				+  if (access(job_dir, R_OK) == 0) {
			
 
				+    printf("FAIL: didn't delete the directory - %s\n", job_dir);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  // but that the canary is not gone
			
 
				+  if (access(dont_touch, R_OK) != 0) {
			
 
				+    printf("FAIL: accidently deleted file %s\n", dont_touch);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(job_dir);
			
 
				+  free(task_dir);
			
 
				+  free(dont_touch);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+void test_delete_user() {
			
 
				+  printf("\nTesting delete_user\n");
			
 
				+  char* job_dir = get_job_directory(TEST_ROOT "/local-1", username, "job_3");
			
 
				+  if (mkdirs(job_dir, 0700) != 0) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  char buffer[100000];
			
 
				+  sprintf(buffer, "%s/local-1/taskTracker/%s", TEST_ROOT, username);
			
 
				+  if (access(buffer, R_OK) != 0) {
			
 
				+    printf("FAIL: directory missing before test\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (delete_as_user(username, "") != 0) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (access(buffer, R_OK) == 0) {
			
 
				+    printf("FAIL: directory not deleted\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (access(TEST_ROOT "/local-1", R_OK) != 0) {
			
 
				+    printf("FAIL: local-1 directory does not exist\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(job_dir);
			
 
				+}
			
 
				+
			
 
				+void test_delete_log_directory() {
			
 
				+  printf("\nTesting delete_log_directory\n");
			
 
				+  char *job_log_dir = get_job_log_directory("job_1");
			
 
				+  if (job_log_dir == NULL) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (create_directory_for_user(job_log_dir) != 0) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(job_log_dir);
			
 
				+  char *task_log_dir = get_job_log_directory("job_1/task_2");
			
 
				+  if (task_log_dir == NULL) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (mkdirs(task_log_dir, 0700) != 0) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (access(TEST_ROOT "/logs/userlogs/job_1/task_2", R_OK) != 0) {
			
 
				+    printf("FAIL: can't access task directory - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (delete_log_directory("job_1/task_2") != 0) {
			
 
				+    printf("FAIL: can't delete task directory\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (access(TEST_ROOT "/logs/userlogs/job_1/task_2", R_OK) == 0) {
			
 
				+    printf("FAIL: task directory not deleted\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (access(TEST_ROOT "/logs/userlogs/job_1", R_OK) != 0) {
			
 
				+    printf("FAIL: job directory not deleted - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (delete_log_directory("job_1") != 0) {
			
 
				+    printf("FAIL: can't delete task directory\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (access(TEST_ROOT "/logs/userlogs/job_1", R_OK) == 0) {
			
 
				+    printf("FAIL: job directory not deleted\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(task_log_dir);
			
 
				+}
			
 
				+
			
 
				+void run_test_in_child(const char* test_name, void (*func)()) {
			
 
				+  printf("\nRunning test %s in child process\n", test_name);
			
 
				+  fflush(stdout);
			
 
				+  fflush(stderr);
			
 
				+  pid_t child = fork();
			
 
				+  if (child == -1) {
			
 
				+    printf("FAIL: fork failed\n");
			
 
				+    exit(1);
			
 
				+  } else if (child == 0) {
			
 
				+    func();
			
 
				+    exit(0);
			
 
				+  } else {
			
 
				+    int status = 0;
			
 
				+    if (waitpid(child, &status, 0) == -1) {
			
 
				+      printf("FAIL: waitpid %d failed - %s\n", child, strerror(errno));
			
 
				+      exit(1);
			
 
				+    }
			
 
				+    if (!WIFEXITED(status)) {
			
 
				+      printf("FAIL: child %d didn't exit - %d\n", child, status);
			
 
				+      exit(1);
			
 
				+    }
			
 
				+    if (WEXITSTATUS(status) != 0) {
			
 
				+      printf("FAIL: child %d exited with bad status %d\n",
			
 
				+	     child, WEXITSTATUS(status));
			
 
				+      exit(1);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void test_signal_task() {
			
 
				+  printf("\nTesting signal_task\n");
			
 
				+  fflush(stdout);
			
 
				+  fflush(stderr);
			
 
				+  pid_t child = fork();
			
 
				+  if (child == -1) {
			
 
				+    printf("FAIL: fork failed\n");
			
 
				+    exit(1);
			
 
				+  } else if (child == 0) {
			
 
				+    if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) {
			
 
				+      exit(1);
			
 
				+    }
			
 
				+    sleep(3600);
			
 
				+    exit(0);
			
 
				+  } else {
			
 
				+    printf("Child task launched as %d\n", child);
			
 
				+    if (signal_user_task(username, child, SIGQUIT) != 0) {
			
 
				+      exit(1);
			
 
				+    }
			
 
				+    int status = 0;
			
 
				+    if (waitpid(child, &status, 0) == -1) {
			
 
				+      printf("FAIL: waitpid failed - %s\n", strerror(errno));
			
 
				+      exit(1);
			
 
				+    }
			
 
				+    if (!WIFSIGNALED(status)) {
			
 
				+      printf("FAIL: child wasn't signalled - %d\n", status);
			
 
				+      exit(1);
			
 
				+    }
			
 
				+    if (WTERMSIG(status) != SIGQUIT) {
			
 
				+      printf("FAIL: child was killed with %d instead of %d\n", 
			
 
				+	     WTERMSIG(status), SIGQUIT);
			
 
				+      exit(1);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void test_signal_task_group() {
			
 
				+  printf("\nTesting group signal_task\n");
			
 
				+  fflush(stdout);
			
 
				+  fflush(stderr);
			
 
				+  pid_t child = fork();
			
 
				+  if (child == -1) {
			
 
				+    printf("FAIL: fork failed\n");
			
 
				+    exit(1);
			
 
				+  } else if (child == 0) {
			
 
				+    setpgrp();
			
 
				+    if (change_user(user_detail->pw_uid, user_detail->pw_gid) != 0) {
			
 
				+      exit(1);
			
 
				+    }
			
 
				+    sleep(3600);
			
 
				+    exit(0);
			
 
				+  }
			
 
				+  printf("Child task launched as %d\n", child);
			
 
				+  if (signal_user_task(username, child, SIGKILL) != 0) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  int status = 0;
			
 
				+  if (waitpid(child, &status, 0) == -1) {
			
 
				+    printf("FAIL: waitpid failed - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (!WIFSIGNALED(status)) {
			
 
				+    printf("FAIL: child wasn't signalled - %d\n", status);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (WTERMSIG(status) != SIGKILL) {
			
 
				+    printf("FAIL: child was killed with %d instead of %d\n", 
			
 
				+	   WTERMSIG(status), SIGKILL);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+void test_init_job() {
			
 
				+  printf("\nTesting init job\n");
			
 
				+  if (seteuid(0) != 0) {
			
 
				+    printf("FAIL: seteuid to root failed - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  FILE* creds = fopen(TEST_ROOT "/creds.txt", "w");
			
 
				+  if (creds == NULL) {
			
 
				+    printf("FAIL: failed to create credentials file - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (fprintf(creds, "secret key\n") < 0) {
			
 
				+    printf("FAIL: fprintf failed - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (fclose(creds) != 0) {
			
 
				+    printf("FAIL: fclose failed - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  FILE* job_xml = fopen(TEST_ROOT "/job.xml", "w");
			
 
				+  if (job_xml == NULL) {
			
 
				+    printf("FAIL: failed to create job file - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (fprintf(job_xml, "<jobconf/>\n") < 0) {
			
 
				+    printf("FAIL: fprintf failed - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (fclose(job_xml) != 0) {
			
 
				+    printf("FAIL: fclose failed - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (seteuid(user_detail->pw_uid) != 0) {
			
 
				+    printf("FAIL: failed to seteuid back to user - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  fflush(stdout);
			
 
				+  fflush(stderr);
			
 
				+  pid_t child = fork();
			
 
				+  if (child == -1) {
			
 
				+    printf("FAIL: failed to fork process for init_job - %s\n", 
			
 
				+	   strerror(errno));
			
 
				+    exit(1);
			
 
				+  } else if (child == 0) {
			
 
				+    char *final_pgm[] = {"touch", "my-touch-file", 0};
			
 
				+    if (initialize_job(username, "job_4", TEST_ROOT "/creds.txt", 
			
 
				+                       TEST_ROOT "/job.xml", final_pgm) != 0) {
			
 
				+      printf("FAIL: failed in child\n");
			
 
				+      exit(42);
			
 
				+    }
			
 
				+    // should never return
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  int status = 0;
			
 
				+  if (waitpid(child, &status, 0) <= 0) {
			
 
				+    printf("FAIL: failed waiting for process %d - %s\n", child, 
			
 
				+	   strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (access(TEST_ROOT "/logs/userlogs/job_4", R_OK) != 0) {
			
 
				+    printf("FAIL: failed to create job log directory\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  char* job_dir = get_job_directory(TEST_ROOT "/local-1", username, "job_4");
			
 
				+  if (access(job_dir, R_OK) != 0) {
			
 
				+    printf("FAIL: failed to create job directory %s\n", job_dir);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  char buffer[100000];
			
 
				+  sprintf(buffer, "%s/jobToken", job_dir);
			
 
				+  if (access(buffer, R_OK) != 0) {
			
 
				+    printf("FAIL: failed to create credentials %s\n", buffer);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  sprintf(buffer, "%s/my-touch-file", job_dir);
			
 
				+  if (access(buffer, R_OK) != 0) {
			
 
				+    printf("FAIL: failed to create touch file %s\n", buffer);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(job_dir);
			
 
				+  job_dir = get_job_log_directory("job_4");
			
 
				+  if (access(job_dir, R_OK) != 0) {
			
 
				+    printf("FAIL: failed to create job log directory %s\n", job_dir);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(job_dir);
			
 
				+}
			
 
				+
			
 
				+void test_run_task() {
			
 
				+  printf("\nTesting run task\n");
			
 
				+  if (seteuid(0) != 0) {
			
 
				+    printf("FAIL: seteuid to root failed - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  const char* script_name = TEST_ROOT "/task-script";
			
 
				+  FILE* script = fopen(script_name, "w");
			
 
				+  if (script == NULL) {
			
 
				+    printf("FAIL: failed to create script file - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (seteuid(user_detail->pw_uid) != 0) {
			
 
				+    printf("FAIL: failed to seteuid back to user - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (fprintf(script, "#!/bin/bash\n"
			
 
				+                     "touch foobar\n"
			
 
				+                     "exit 0") < 0) {
			
 
				+    printf("FAIL: fprintf failed - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (fclose(script) != 0) {
			
 
				+    printf("FAIL: fclose failed - %s\n", strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  fflush(stdout);
			
 
				+  fflush(stderr);
			
 
				+  char* task_dir = get_attempt_work_directory(TEST_ROOT "/local-1", 
			
 
				+					      username, "job_4", "task_1");
			
 
				+  pid_t child = fork();
			
 
				+  if (child == -1) {
			
 
				+    printf("FAIL: failed to fork process for init_job - %s\n", 
			
 
				+	   strerror(errno));
			
 
				+    exit(1);
			
 
				+  } else if (child == 0) {
			
 
				+    if (run_task_as_user(username, "job_4", "task_1", 
			
 
				+                         task_dir, script_name) != 0) {
			
 
				+      printf("FAIL: failed in child\n");
			
 
				+      exit(42);
			
 
				+    }
			
 
				+    // should never return
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  int status = 0;
			
 
				+  if (waitpid(child, &status, 0) <= 0) {
			
 
				+    printf("FAIL: failed waiting for process %d - %s\n", child, 
			
 
				+	   strerror(errno));
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (access(TEST_ROOT "/logs/userlogs/job_4/task_1", R_OK) != 0) {
			
 
				+    printf("FAIL: failed to create task log directory\n");
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  if (access(task_dir, R_OK) != 0) {
			
 
				+    printf("FAIL: failed to create task directory %s\n", task_dir);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  char buffer[100000];
			
 
				+  sprintf(buffer, "%s/foobar", task_dir);
			
 
				+  if (access(buffer, R_OK) != 0) {
			
 
				+    printf("FAIL: failed to create touch file %s\n", buffer);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(task_dir);
			
 
				+  task_dir = get_job_log_directory("job_4/task_1");
			
 
				+  if (access(task_dir, R_OK) != 0) {
			
 
				+    printf("FAIL: failed to create job log directory %s\n", task_dir);
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  free(task_dir);
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char **argv) {
			
 
				+  LOGFILE = stdout;
			
 
				+  int my_username = 0;
			
 
				+
			
 
				+  // clean up any junk from previous run
			
 
				+  system("chmod -R u=rwx " TEST_ROOT "; rm -fr " TEST_ROOT);
			
 
				+  
			
 
				+  if (mkdirs(TEST_ROOT "/logs/userlogs", 0755) != 0) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  
			
 
				+  if (write_config_file(TEST_ROOT "/test.cfg") != 0) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+  read_config(TEST_ROOT "/test.cfg");
			
 
				+
			
 
				+  create_tt_roots();
			
 
				+
			
 
				+  if (getuid() == 0 && argc == 2) {
			
 
				+    username = argv[1];
			
 
				+  } else {
			
 
				+    username = strdup(getpwuid(getuid())->pw_name);
			
 
				+    my_username = 1;
			
 
				+  }
			
 
				+  set_tasktracker_uid(geteuid(), getegid());
			
 
				+
			
 
				+  if (set_user(username)) {
			
 
				+    exit(1);
			
 
				+  }
			
 
				+
			
 
				+  printf("\nStarting tests\n");
			
 
				+
			
 
				+  printf("\nTesting get_user_directory()\n");
			
 
				+  test_get_user_directory();
			
 
				+
			
 
				+  printf("\nTesting get_job_directory()\n");
			
 
				+  test_get_job_directory();
			
 
				+
			
 
				+  printf("\nTesting get_attempt_directory()\n");
			
 
				+  test_get_attempt_directory();
			
 
				+
			
 
				+  printf("\nTesting get_task_launcher_file()\n");
			
 
				+  test_get_task_launcher_file();
			
 
				+
			
 
				+  printf("\nTesting get_job_log_dir()\n");
			
 
				+  test_get_job_log_dir();
			
 
				+
			
 
				+  test_check_configuration_permissions();
			
 
				+
			
 
				+  printf("\nTesting get_task_log_dir()\n");
			
 
				+  test_get_task_log_dir();
			
 
				+
			
 
				+  printf("\nTesting delete_task()\n");
			
 
				+  test_delete_task();
			
 
				+
			
 
				+  printf("\nTesting delete_job()\n");
			
 
				+  test_delete_job();
			
 
				+
			
 
				+  test_delete_user();
			
 
				+
			
 
				+  test_check_user();
			
 
				+
			
 
				+  test_delete_log_directory();
			
 
				+
			
 
				+  // the tests that change user need to be run in a subshell, so that
			
 
				+  // when they change user they don't give up our privs
			
 
				+  run_test_in_child("test_signal_task", test_signal_task);
			
 
				+  run_test_in_child("test_signal_task_group", test_signal_task_group);
			
 
				+
			
 
				+  // init job and run task can't be run if you aren't testing as root
			
 
				+  if (getuid() == 0) {
			
 
				+    // these tests do internal forks so that the change_owner and execs
			
 
				+    // don't mess up our process.
			
 
				+    test_init_job();
			
 
				+    test_run_task();
			
 
				+  }
			
 
				+
			
 
				+  seteuid(0);
			
 
				+  run("rm -fr " TEST_ROOT);
			
 
				+  printf("\nFinished tests\n");
			
 
				+
			
 
				+  if (my_username) {
			
 
				+    free(username);
			
 
				+  }
			
 
				+  free_configurations();
			
 
				+  return 0;
			
 
				+}
			
--- a/src/c++/utils/Makefile.in
+++ b/src/c++/utils/Makefile.in
@@ -1,4 +1,4 @@
 
				-# Makefile.in generated by automake 1.9 from Makefile.am.
			
 
				+# Makefile.in generated by automake 1.9.2 from Makefile.am.
			
 
				 # @configure_input@
			
 
				 
			
 
				 # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
			
@@ -37,13 +37,12 @@ POST_INSTALL = :
 
				 NORMAL_UNINSTALL = :
			
 
				 PRE_UNINSTALL = :
			
 
				 POST_UNINSTALL = :
			
 
				+build_triplet = @build@
			
 
				 host_triplet = @host@
			
 
				-DIST_COMMON = config.guess config.guess config.sub config.sub \
			
 
				-	$(srcdir)/Makefile.in $(srcdir)/Makefile.am \
			
 
				-	$(top_srcdir)/configure $(am__configure_deps) \
			
 
				-	$(top_srcdir)/impl/config.h.in depcomp depcomp ltmain.sh \
			
 
				-	ltmain.sh config.guess config.guess config.sub config.sub \
			
 
				-	$(api_HEADERS)
			
 
				+DIST_COMMON = config.guess config.sub $(srcdir)/Makefile.in \
			
 
				+	$(srcdir)/Makefile.am $(top_srcdir)/configure \
			
 
				+	$(am__configure_deps) $(top_srcdir)/impl/config.h.in depcomp \
			
 
				+	ltmain.sh config.guess config.sub $(api_HEADERS)
			
 
				 subdir = .
			
 
				 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
			
 
				 am__aclocal_m4_deps = $(top_srcdir)/m4/hadoop_utils.m4 \
			
@@ -116,6 +115,7 @@ EGREP = @EGREP@
 
				 EXEEXT = @EXEEXT@
			
 
				 F77 = @F77@
			
 
				 FFLAGS = @FFLAGS@
			
 
				+GREP = @GREP@
			
 
				 INSTALL_DATA = @INSTALL_DATA@
			
 
				 INSTALL_PROGRAM = @INSTALL_PROGRAM@
			
 
				 INSTALL_SCRIPT = @INSTALL_SCRIPT@
			
@@ -140,12 +140,9 @@ SET_MAKE = @SET_MAKE@
 
				 SHELL = @SHELL@
			
 
				 STRIP = @STRIP@
			
 
				 VERSION = @VERSION@
			
 
				-ac_ct_AR = @ac_ct_AR@
			
 
				 ac_ct_CC = @ac_ct_CC@
			
 
				 ac_ct_CXX = @ac_ct_CXX@
			
 
				 ac_ct_F77 = @ac_ct_F77@
			
 
				-ac_ct_RANLIB = @ac_ct_RANLIB@
			
 
				-ac_ct_STRIP = @ac_ct_STRIP@
			
 
				 am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
			
 
				 am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
			
 
				 am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
			
@@ -162,23 +159,30 @@ build_cpu = @build_cpu@
 
				 build_os = @build_os@
			
 
				 build_vendor = @build_vendor@
			
 
				 datadir = @datadir@
			
 
				+datarootdir = @datarootdir@
			
 
				+docdir = @docdir@
			
 
				+dvidir = @dvidir@
			
 
				 exec_prefix = @exec_prefix@
			
 
				 host = @host@
			
 
				 host_alias = @host_alias@
			
 
				 host_cpu = @host_cpu@
			
 
				 host_os = @host_os@
			
 
				 host_vendor = @host_vendor@
			
 
				+htmldir = @htmldir@
			
 
				 includedir = @includedir@
			
 
				 infodir = @infodir@
			
 
				 install_sh = @install_sh@
			
 
				 libdir = @libdir@
			
 
				 libexecdir = @libexecdir@
			
 
				+localedir = @localedir@
			
 
				 localstatedir = @localstatedir@
			
 
				 mandir = @mandir@
			
 
				 mkdir_p = @mkdir_p@
			
 
				 oldincludedir = @oldincludedir@
			
 
				+pdfdir = @pdfdir@
			
 
				 prefix = @prefix@
			
 
				 program_transform_name = @program_transform_name@
			
 
				+psdir = @psdir@
			
 
				 sbindir = @sbindir@
			
 
				 sharedstatedir = @sharedstatedir@
			
 
				 sysconfdir = @sysconfdir@
			
--- a/src/c++/utils/aclocal.m4
+++ b/src/c++/utils/aclocal.m4
--- a/src/c++/utils/configure
+++ b/src/c++/utils/configure
--- a/src/c++/utils/m4/hadoop_utils.m4
+++ b/src/c++/utils/m4/hadoop_utils.m4
@@ -51,6 +51,8 @@ AC_CHECK_HEADERS([pthread.h], [],
 
				   AC_MSG_ERROR(Please check if you have installed the pthread library)) 
			
 
				 AC_CHECK_LIB([pthread], [pthread_create], [], 
			
 
				   AC_MSG_ERROR(Cannot find libpthread.so, please check))
			
 
				+AC_CHECK_LIB([ssl], [HMAC_Init], [], 
			
 
				+  AC_MSG_ERROR(Cannot find libssl.so, please check))
			
 
				 ])
			
 
				 
			
 
				 # define a macro for using hadoop pipes
			
--- a/src/contrib/build-contrib.xml
+++ b/src/contrib/build-contrib.xml
@@ -32,15 +32,23 @@
 
				   <property name="hadoop.root" location="${root}/../../../"/>
			
 
				   <property name="src.dir"  location="${root}/src/java"/>
			
 
				   <property name="src.test" location="${root}/src/test"/>
			
 
				+  <!-- Property added for contrib system tests -->
			
 
				+  <property name="src.test.system" location="${root}/src/test/system"/>
			
 
				+
			
 
				   <property name="src.examples" location="${root}/src/examples"/>
			
 
				 
			
 
				   <available file="${src.examples}" type="dir" property="examples.available"/>
			
 
				   <available file="${src.test}" type="dir" property="test.available"/>
			
 
				 
			
 
				+  <!-- Property added for contrib system tests -->
			
 
				+  <available file="${src.test.system}" type="dir" 
			
 
				+      property="test.system.available"/>
			
 
				+ 
			
 
				   <property name="conf.dir" location="${hadoop.root}/conf"/>
			
 
				   <property name="test.junit.output.format" value="plain"/>
			
 
				   <property name="test.output" value="no"/>
			
 
				   <property name="test.timeout" value="900000"/>
			
 
				+  <property name="build.contrib.dir" location="${hadoop.root}/build/contrib"/>
			
 
				   <property name="build.dir" location="${hadoop.root}/build/contrib/${name}"/>
			
 
				   <property name="build.classes" location="${build.dir}/classes"/>
			
 
				   <property name="build.test" location="${build.dir}/test"/>
			
@@ -58,6 +66,10 @@
 
				 
			
 
				   <fileset id="lib.jars" dir="${root}" includes="lib/*.jar"/>
			
 
				 
			
 
				+  <!-- Property added for contrib system tests -->
			
 
				+  <property name="build.test.system" location="${build.dir}/system"/>
			
 
				+  <property name="build.system.classes" 
			
 
				+      location="${build.test.system}/classes"/>
			
 
				 
			
 
				    <!-- IVY properties set here -->
			
 
				   <property name="ivy.dir" location="ivy" />
			
@@ -80,12 +92,14 @@
 
				   <!-- the normal classpath -->
			
 
				   <path id="contrib-classpath">
			
 
				     <pathelement location="${build.classes}"/>
			
 
				+    <pathelement location="${hadoop.root}/build/tools"/>
			
 
				     <fileset refid="lib.jars"/>
			
 
				     <pathelement location="${hadoop.root}/build/classes"/>
			
 
				     <fileset dir="${hadoop.root}/lib">
			
 
				       <include name="**/*.jar" />
			
 
				     </fileset>
			
 
				     <path refid="${ant.project.name}.common-classpath"/>
			
 
				+    <pathelement path="${clover.jar}"/>
			
 
				   </path>
			
 
				 
			
 
				   <!-- the unit test classpath -->
			
@@ -96,9 +110,40 @@
 
				     <pathelement location="${conf.dir}"/>
			
 
				     <pathelement location="${hadoop.root}/build"/>
			
 
				     <pathelement location="${build.examples}"/>
			
 
				+    <pathelement location="${hadoop.root}/build/examples"/>
			
 
				     <path refid="contrib-classpath"/>
			
 
				   </path>
			
 
				 
			
 
				+  <!-- The system test classpath -->
			
 
				+  <path id="test.system.classpath">
			
 
				+    <pathelement location="${hadoop.root}/src/contrib/${name}/src/test/system" />
			
 
				+    <pathelement location="${build.test.system}" />
			
 
				+    <pathelement location="${build.test.system}/classes"/>
			
 
				+    <pathelement location="${build.examples}"/>
			
 
				+    <pathelement location="${hadoop.root}/build-fi/system/classes" />
			
 
				+    <pathelement location="${hadoop.root}/build-fi/system/test/classes" />
			
 
				+    <pathelement location="${hadoop.root}/build-fi" />
			
 
				+    <pathelement location="${hadoop.root}/build-fi/tools" />
			
 
				+    <pathelement location="${hadoop.home}"/>
			
 
				+    <pathelement location="${hadoop.conf.dir}"/>
			
 
				+    <pathelement location="${hadoop.conf.dir.deployed}"/>
			
 
				+    <pathelement location="${hadoop.root}/build"/>
			
 
				+    <pathelement location="${hadoop.root}/build/examples"/>
			
 
				+    <pathelement location="${hadoop.root}/build-fi/test/classes" />
			
 
				+    <path refid="contrib-classpath"/>
			
 
				+    <fileset dir="${hadoop.root}/src/test/lib">
			
 
				+      <include name="**/*.jar" />
			
 
				+      <exclude name="**/excluded/" />
			
 
				+    </fileset>
			
 
				+    <fileset dir="${hadoop.root}/build-fi/system">
			
 
				+       <include name="**/*.jar" />
			
 
				+       <exclude name="**/excluded/" />
			
 
				+     </fileset>
			
 
				+    <fileset dir="${hadoop.root}/build-fi/test/testjar">
			
 
				+      <include name="**/*.jar" />
			
 
				+      <exclude name="**/excluded/" />
			
 
				+    </fileset>
			
 
				+  </path>
			
 
				 
			
 
				   <!-- to be overridden by sub-projects -->
			
 
				   <target name="check-contrib"/>
			
@@ -112,6 +157,9 @@
 
				     <mkdir dir="${build.dir}"/>
			
 
				     <mkdir dir="${build.classes}"/>
			
 
				     <mkdir dir="${build.test}"/>
			
 
				+    <!-- The below two tags  added for contrib system tests -->
			
 
				+    <mkdir dir="${build.test.system}"/>
			
 
				+    <mkdir dir="${build.system.classes}"/> 
			
 
				     <mkdir dir="${build.examples}"/>
			
 
				     <mkdir dir="${hadoop.log.dir}"/>
			
 
				     <antcall target="init-contrib"/>
			
@@ -160,12 +208,28 @@
 
				      encoding="${build.encoding}"
			
 
				      srcdir="${src.test}"
			
 
				      includes="**/*.java"
			
 
				+     excludes="system/**/*.java"
			
 
				      destdir="${build.test}"
			
 
				      debug="${javac.debug}">
			
 
				     <classpath refid="test.classpath"/>
			
 
				     </javac>
			
 
				   </target>
			
 
				   
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Compile system test code                                           -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="compile-test-system" depends="compile-examples"
			
 
				+     if="test.system.available">
			
 
				+    <echo message="contrib: ${name}"/>
			
 
				+    <javac
			
 
				+       encoding="${build.encoding}"
			
 
				+       srcdir="${src.test.system}"
			
 
				+       includes="**/*.java"
			
 
				+       destdir="${build.system.classes}"
			
 
				+       debug="${javac.debug}">
			
 
				+      <classpath refid="test.system.classpath"/>
			
 
				+    </javac>
			
 
				+  </target>
			
 
				 
			
 
				   <!-- ====================================================== -->
			
 
				   <!-- Make a Hadoop contrib's jar                            -->
			
@@ -173,7 +237,7 @@
 
				   <target name="jar" depends="compile" unless="skip.contrib">
			
 
				     <echo message="contrib: ${name}"/>
			
 
				     <jar
			
 
				-      jarfile="${build.dir}/hadoop-${version}-${name}.jar"
			
 
				+      jarfile="${build.dir}/hadoop-${name}-${version}.jar"
			
 
				       basedir="${build.classes}"      
			
 
				     />
			
 
				   </target>
			
@@ -185,7 +249,7 @@
 
				   <target name="jar-examples" depends="compile-examples"
			
 
				           if="examples.available" unless="skip.contrib">
			
 
				     <echo message="contrib: ${name}"/>
			
 
				-    <jar jarfile="${build.dir}/hadoop-${version}-${name}-examples.jar">
			
 
				+    <jar jarfile="${build.dir}/hadoop-${name}-examples-${version}.jar">
			
 
				       <fileset dir="${build.classes}">
			
 
				       </fileset>
			
 
				       <fileset dir="${build.examples}">
			
@@ -200,7 +264,7 @@
 
				     <mkdir dir="${dist.dir}/contrib/${name}"/>
			
 
				     <copy todir="${dist.dir}/contrib/${name}" includeEmptyDirs="false" flatten="true">
			
 
				       <fileset dir="${build.dir}">
			
 
				-        <include name="hadoop-${version}-${name}.jar" />
			
 
				+        <include name="hadoop-${name}-${version}.jar" />
			
 
				       </fileset>
			
 
				     </copy>
			
 
				   </target>
			
@@ -231,17 +295,74 @@
 
				       <sysproperty key="fs.default.name" value="${fs.default.name}"/>
			
 
				       <sysproperty key="hadoop.test.localoutputfile" value="${hadoop.test.localoutputfile}"/>
			
 
				       <sysproperty key="hadoop.log.dir" value="${hadoop.log.dir}"/> 
			
 
				+      <sysproperty key="taskcontroller-path" value="${taskcontroller-path}"/>
			
 
				+      <sysproperty key="taskcontroller-ugi" value="${taskcontroller-ugi}"/>
			
 
				       <classpath refid="test.classpath"/>
			
 
				       <formatter type="${test.junit.output.format}" />
			
 
				       <batchtest todir="${build.test}" unless="testcase">
			
 
				         <fileset dir="${src.test}"
			
 
				-                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
			
 
				+                 includes="**/Test*.java" excludes="**/${test.exclude}.java, system/**/*.java" />
			
 
				       </batchtest>
			
 
				       <batchtest todir="${build.test}" if="testcase">
			
 
				-        <fileset dir="${src.test}" includes="**/${testcase}.java"/>
			
 
				+        <fileset dir="${src.test}" includes="**/${testcase}.java" excludes="system/**/*.java" />
			
 
				       </batchtest>
			
 
				     </junit>
			
 
				-    <fail if="tests.failed">Tests failed!</fail>
			
 
				+    <antcall target="checkfailure"/>
			
 
				+  </target>
			
 
				+
			
 
				+  <!-- ================================================================== -->
			
 
				+  <!-- Run system tests                                                   -->
			
 
				+  <!-- ================================================================== -->
			
 
				+  <target name="test-system" depends="compile, compile-test-system"
			
 
				+     if="test.system.available">
			
 
				+     <delete dir="${build.test.system}/extraconf"/>
			
 
				+     <mkdir dir="${build.test.system}/extraconf"/>
			
 
				+     <property name="test.src.dir" location="${hadoop.root}/src/test"/>
			
 
				+     <property name="test.junit.printsummary" value="yes" />
			
 
				+     <property name="test.junit.haltonfailure" value="no" />
			
 
				+     <property name="test.junit.maxmemory" value="512m" />
			
 
				+     <property name="test.junit.fork.mode" value="perTest" />
			
 
				+     <property name="test.all.tests.file" value="${test.src.dir}/all-tests" />
			
 
				+     <property name="test.build.dir" value="${hadoop.root}/build/test"/>
			
 
				+     <property name="basedir" value="${hadoop.root}"/>
			
 
				+     <property name="test.timeout" value="900000"/>
			
 
				+     <property name="test.junit.output.format" value="plain"/>
			
 
				+     <property name="test.tools.input.dir" value="${basedir}/src/test/tools/data"/>
			
 
				+     <property name="c++.src" value="${basedir}/src/c++"/>
			
 
				+     <property name="test.include" value="Test*"/>
			
 
				+     <property name="c++.libhdfs.src" value="${c++.src}/libhdfs"/>
			
 
				+     <property name="test.build.data" value="${build.test.system}/data"/>
			
 
				+     <property name="test.cache.data" value="${build.test.system}/cache"/>
			
 
				+     <property name="test.debug.data" value="${build.test.system}/debug"/>
			
 
				+     <property name="test.log.dir" value="${build.test.system}/logs"/>
			
 
				+     <patternset id="empty.exclude.list.id" />
			
 
				+        <exec executable="sed" inputstring="${os.name}"
			
 
				+            outputproperty="nonspace.os">
			
 
				+          <arg value="s/ /_/g"/>
			
 
				+        </exec>
			
 
				+     <property name="build.platform"
			
 
				+         value="${nonspace.os}-${os.arch}-${sun.arch.data.model}"/>
			
 
				+     <property name="build.native" 
			
 
				+         value="${hadoop.root}/build/native/${build.platform}"/>
			
 
				+     <property name="lib.dir" value="${hadoop.root}/lib"/>
			
 
				+     <property name="install.c++.examples"
			
 
				+         value="${hadoop.root}/build/c++-examples/${build.platform}"/>
			
 
				+    <condition property="tests.testcase">
			
 
				+       <and>
			
 
				+         <isset property="testcase" />
			
 
				+       </and>
			
 
				+    </condition>
			
 
				+    <macro-test-runner test.file="${test.all.tests.file}"
			
 
				+                       classpath="test.system.classpath"
			
 
				+                       test.dir="${build.test.system}"
			
 
				+                       fileset.dir="${hadoop.root}/src/contrib/${name}/src/test/system"
			
 
				+                       hadoop.conf.dir.deployed="${hadoop.conf.dir.deployed}">
			
 
				+    </macro-test-runner>
			
 
				+  </target>
			
 
				+
			
 
				+  <target name="checkfailure" if="tests.failed">
			
 
				+    <touch file="${build.contrib.dir}/testsfailed"/>
			
 
				+    <fail unless="continueOnFailure">Contrib Tests failed!</fail>
			
 
				   </target>
			
 
				 
			
 
				   <!-- ================================================================== -->
			
--- a/src/contrib/build.xml
+++ b/src/contrib/build.xml
@@ -45,15 +45,45 @@
 
				   <!-- Test all the contribs.                               -->
			
 
				   <!-- ====================================================== -->
			
 
				   <target name="test">
			
 
				+     <property name="hadoop.root" location="${root}/../../../"/>
			
 
				+     <property name="build.contrib.dir" location="${hadoop.root}/build/contrib"/>
			
 
				+     <delete file="${build.contrib.dir}/testsfailed"/>
			
 
				     <subant target="test">
			
 
				+      <property name="continueOnFailure" value="true"/>
			
 
				       <fileset dir="." includes="hdfsproxy/build.xml"/>
			
 
				       <fileset dir="." includes="streaming/build.xml"/>
			
 
				       <fileset dir="." includes="fairscheduler/build.xml"/>
			
 
				       <fileset dir="." includes="capacity-scheduler/build.xml"/>
			
 
				+      <fileset dir="." includes="gridmix/build.xml"/>
			
 
				     </subant>
			
 
				+     <available file="${build.contrib.dir}/testsfailed" property="testsfailed"/>
			
 
				+     <fail if="testsfailed">Tests failed!</fail>
			
 
				   </target>
			
 
				   
			
 
				   
			
 
				+  <!-- ====================================================== -->
			
 
				+  <!-- Test all the contrib system tests                     -->
			
 
				+  <!-- ====================================================== -->
			
 
				+  <target name="test-system-contrib">
			
 
				+    <property name="hadoop.root" location="${root}/../../../"/>
			
 
				+    <property name="build.contrib.dir" location="${hadoop.root}/build/contrib"/>
			
 
				+    <delete file="${build.contrib.dir}/testsfailed"/>
			
 
				+    <subant target="test-system">
			
 
				+       <property name="continueOnFailure" value="true"/>
			
 
				+       <property name="hadoop.home" value="${hadoop.home}"/>
			
 
				+       <property name="hadoop.conf.dir" value="${hadoop.conf.dir}"/>
			
 
				+       <property name="hadoop.conf.dir.deployed"
			
 
				+           value="${hadoop.conf.dir.deployed}"/>
			
 
				+       <fileset dir="." includes="hdfsproxy/build.xml"/>
			
 
				+       <fileset dir="." includes="streaming/build.xml"/>
			
 
				+       <fileset dir="." includes="fairscheduler/build.xml"/>
			
 
				+       <fileset dir="." includes="capacity-scheduler/build.xml"/>
			
 
				+       <fileset dir="." includes="gridmix/build.xml"/>
			
 
				+    </subant>
			
 
				+    <available file="${build.contrib.dir}/testsfailed" property="testsfailed"/>
			
 
				+    <fail if="testsfailed">Tests failed!</fail>
			
 
				+  </target>
			
 
				+
			
 
				   <!-- ====================================================== -->
			
 
				   <!-- Clean all the contribs.                              -->
			
 
				   <!-- ====================================================== -->
			
--- a/src/contrib/capacity-scheduler/ivy.xml
+++ b/src/contrib/capacity-scheduler/ivy.xml
@@ -43,14 +43,30 @@
 
				     <dependency org="org.mortbay.jetty"
			
 
				       name="jetty"
			
 
				       rev="${jetty.version}"
			
 
				-      conf="common->master"/>
			
 
				-    <dependency org="org.mortbay.jetty"
			
 
				-      name="servlet-api-2.5"
			
 
				-      rev="${servlet-api-2.5.version}"
			
 
				-      conf="common->master"/> 
			
 
				+      conf="common->default"/>
			
 
				     <dependency org="commons-httpclient"
			
 
				       name="commons-httpclient"
			
 
				       rev="${commons-httpclient.version}"
			
 
				       conf="common->master"/> 
			
 
				+    <dependency org="commons-codec"
			
 
				+      name="commons-codec"
			
 
				+      rev="${commons-codec.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.codehaus.jackson"
			
 
				+      name="jackson-mapper-asl"
			
 
				+      rev="${jackson.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.codehaus.jackson"
			
 
				+      name="jackson-core-asl"
			
 
				+      rev="${jackson.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="commons-configuration"
			
 
				+      name="commons-configuration"
			
 
				+      rev="${commons-configuration.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.apache.commons"
			
 
				+      name="commons-math"
			
 
				+      rev="${commons-math.version}"
			
 
				+      conf="common->default"/>
			
 
				   </dependencies>
			
 
				 </ivy-module>
			
--- a/src/contrib/capacity-scheduler/ivy/libraries.properties
+++ b/src/contrib/capacity-scheduler/ivy/libraries.properties
@@ -3,3 +3,7 @@
 
				 
			
 
				 #Please list the dependencies name with version if they are different from the ones 
			
 
				 #listed in the global libraries.properties file (in alphabetical order)
			
 
				+
			
 
				+jackson.version=1.0.1
			
 
				+commons-configuration.version=1.6
			
 
				+commons-math.version=2.1
			
--- a/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/CapacitySchedulerConf.java
+++ b/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/CapacitySchedulerConf.java
@@ -36,6 +36,8 @@ class CapacitySchedulerConf {
 
				   
			
 
				   private int defaultUlimitMinimum;
			
 
				   
			
 
				+  private float defaultUserLimitFactor;
			
 
				+  
			
 
				   private boolean defaultSupportPriority;
			
 
				   
			
 
				   private static final String QUEUE_CONF_PROPERTY_NAME_PREFIX = 
			
@@ -75,11 +77,20 @@ class CapacitySchedulerConf {
 
				   static final String UPPER_LIMIT_ON_TASK_PMEM_PROPERTY =
			
 
				     "mapred.capacity-scheduler.task.limit.maxpmem";
			
 
				 
			
 
				+
			
 
				+  private static final String CAPACITY_PROPERTY = "capacity";
			
 
				+
			
 
				+  /**
			
 
				+    * A maximum capacity defines a limit beyond which a queue
			
 
				+    * cannot expand .
			
 
				+   */
			
 
				+   static final String MAX_CAPACITY_PROPERTY ="maximum-capacity";
			
 
				+
			
 
				   /**
			
 
				    * The constant which defines the default initialization thread
			
 
				    * polling interval, denoted in milliseconds.
			
 
				    */
			
 
				-  private static final int INITIALIZATION_THREAD_POLLING_INTERVAL = 5000;
			
 
				+  private static final int INITIALIZATION_THREAD_POLLING_INTERVAL = 3000;
			
 
				 
			
 
				   /**
			
 
				    * The constant which defines the maximum number of worker threads to be
			
@@ -89,12 +100,21 @@ class CapacitySchedulerConf {
 
				 
			
 
				   private Configuration rmConf;
			
 
				 
			
 
				-  private int defaultMaxJobsPerUsersToInitialize;
			
 
				+  private int defaultInitToAcceptJobsFactor;
			
 
				+  private int defaultMaxActiveTasksPerUserToInitialize;
			
 
				+  private int defaultMaxActiveTasksPerQueueToInitialize;
			
 
				+  
			
 
				+  static final String MAX_SYSTEM_JOBS_KEY = 
			
 
				+    "mapred.capacity-scheduler.maximum-system-jobs";
			
 
				+  
			
 
				+  static final int DEFAULT_MAX_SYSTEM_JOBS = 5000;
			
 
				+  
			
 
				+  static final int DEFAULT_MAX_TASKS_TO_SCHEDULE_AFTER_OFFSWITCH = 0;
			
 
				   
			
 
				   /**
			
 
				-   * Create a new ResourceManagerConf.
			
 
				+   * Create a new Capacity scheduler conf.
			
 
				    * This method reads from the default configuration file mentioned in
			
 
				-   * {@link RM_CONF_FILE}, that must be present in the classpath of the
			
 
				+   * {@link SCHEDULER_CONF_FILE}, that must be present in the classpath of the
			
 
				    * application.
			
 
				    */
			
 
				   public CapacitySchedulerConf() {
			
@@ -104,7 +124,7 @@ class CapacitySchedulerConf {
 
				   }
			
 
				 
			
 
				   /**
			
 
				-   * Create a new ResourceManagerConf reading the specified configuration
			
 
				+   * Create a new Cacpacity scheduler conf reading the specified configuration
			
 
				    * file.
			
 
				    * 
			
 
				    * @param configFile {@link Path} to the configuration file containing
			
@@ -121,13 +141,25 @@ class CapacitySchedulerConf {
 
				    * which is used by the Capacity Scheduler.
			
 
				    */
			
 
				   private void initializeDefaults() {
			
 
				-    defaultUlimitMinimum = rmConf.getInt(
			
 
				-        "mapred.capacity-scheduler.default-minimum-user-limit-percent", 100);
			
 
				+    defaultUlimitMinimum = 
			
 
				+      rmConf.getInt(
			
 
				+          "mapred.capacity-scheduler.default-minimum-user-limit-percent", 100);
			
 
				+    defaultUserLimitFactor = 
			
 
				+      rmConf.getFloat("mapred.capacity-scheduler.default-user-limit-factor", 
			
 
				+          1.0f);
			
 
				     defaultSupportPriority = rmConf.getBoolean(
			
 
				         "mapred.capacity-scheduler.default-supports-priority", false);
			
 
				-    defaultMaxJobsPerUsersToInitialize = rmConf.getInt(
			
 
				-        "mapred.capacity-scheduler.default-maximum-initialized-jobs-per-user",
			
 
				-        2);
			
 
				+    defaultMaxActiveTasksPerQueueToInitialize = 
			
 
				+      rmConf.getInt(
			
 
				+          "mapred.capacity-scheduler.default-maximum-active-tasks-per-queue", 
			
 
				+          200000);
			
 
				+    defaultMaxActiveTasksPerUserToInitialize = 
			
 
				+      rmConf.getInt(
			
 
				+          "mapred.capacity-scheduler.default-maximum-active-tasks-per-user", 
			
 
				+          100000);
			
 
				+    defaultInitToAcceptJobsFactor =
			
 
				+      rmConf.getInt("mapred.capacity-scheduler.default-init-accept-jobs-factor", 
			
 
				+          10);
			
 
				   }
			
 
				   
			
 
				   /**
			
@@ -151,16 +183,15 @@ class CapacitySchedulerConf {
 
				     //In case of both capacity and default capacity not configured.
			
 
				     //Last check is if the configuration is specified and is marked as
			
 
				     //negative we throw exception
			
 
				-    String raw = rmConf.getRaw(toFullPropertyName(queue, 
			
 
				-        "capacity"));
			
 
				+    String raw = rmConf.getRaw(toFullPropertyName(queue, CAPACITY_PROPERTY));
			
 
				     if(raw == null) {
			
 
				       return -1;
			
 
				     }
			
 
				-    float result = rmConf.getFloat(toFullPropertyName(queue, 
			
 
				-                                   "capacity"), 
			
 
				-                                   -1);
			
 
				+    float result = rmConf.getFloat(
			
 
				+      toFullPropertyName(queue, CAPACITY_PROPERTY), -1);
			
 
				     if (result < 0.0 || result > 100.0) {
			
 
				-      throw new IllegalArgumentException("Illegal capacity for queue " + queue +
			
 
				+      throw new IllegalArgumentException(
			
 
				+        "Illegal capacity for queue " + queue +
			
 
				                                          " of " + result);
			
 
				     }
			
 
				     return result;
			
@@ -173,7 +204,53 @@ class CapacitySchedulerConf {
 
				    * @param capacity percent of the cluster for the queue.
			
 
				    */
			
 
				   public void setCapacity(String queue,float capacity) {
			
 
				-    rmConf.setFloat(toFullPropertyName(queue, "capacity"),capacity);
			
 
				+    rmConf.setFloat(toFullPropertyName(queue, CAPACITY_PROPERTY),capacity);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Return the maximum percentage of the cluster capacity that can be used by
			
 
				+   * the given queue.
			
 
				+   * This percentage defines a limit beyond which a
			
 
				+   * queue cannot use the capacity of cluster.
			
 
				+   * This provides a means to limit how much excess capacity a
			
 
				+   * queue can use. By default, there is no limit.
			
 
				+   *
			
 
				+   * The maximum-capacity of a queue can only be
			
 
				+   * greater than or equal to its minimum capacity.
			
 
				+   *
			
 
				+   * @param queue name of the queue.
			
 
				+   * @return maximum-capacity for the given queue
			
 
				+   */
			
 
				+  public float getMaxCapacity(String queue) {
			
 
				+    float result = rmConf.getFloat(
			
 
				+      toFullPropertyName(queue, MAX_CAPACITY_PROPERTY), -1);
			
 
				+
			
 
				+    //if result is 0 or less than 0 set it to -1
			
 
				+    result = (result <= 0) ? -1 : result;
			
 
				+
			
 
				+    if (result > 100.0) {
			
 
				+      throw new IllegalArgumentException(
			
 
				+        "Illegal " + MAX_CAPACITY_PROPERTY +
			
 
				+          " for queue " + queue + " of " + result);
			
 
				+    }
			
 
				+
			
 
				+    if ((result != -1) && (result < getCapacity(queue))) {
			
 
				+      throw new IllegalArgumentException(
			
 
				+        MAX_CAPACITY_PROPERTY + " " + result +
			
 
				+          " for a queue should be greater than or equal to capacity ");
			
 
				+    }
			
 
				+    return result;
			
 
				+  }
			
 
				+
			
 
				+    /**
			
 
				+   * Sets the maxCapacity of the given queue.
			
 
				+   *
			
 
				+   * @param queue name of the queue
			
 
				+   * @param maxCapacity percent of the cluster for the queue.
			
 
				+   */
			
 
				+  public void setMaxCapacity(String queue,float maxCapacity) {
			
 
				+      rmConf.setFloat(
			
 
				+        toFullPropertyName(queue, MAX_CAPACITY_PROPERTY), maxCapacity);
			
 
				   }
			
 
				   
			
 
				   /**
			
@@ -239,6 +316,32 @@ class CapacitySchedulerConf {
 
				                     value);
			
 
				   }
			
 
				   
			
 
				+  /**
			
 
				+   * Get the factor of queue capacity above which a single user in a queue
			
 
				+   * can consume resources.
			
 
				+   * 
			
 
				+   * @param queue queue name
			
 
				+   * @return factor of queue capacity above which a single user in a queue
			
 
				+   *         can consume resources
			
 
				+   */
			
 
				+  public float getUserLimitFactor(String queue) {
			
 
				+    return rmConf.getFloat(toFullPropertyName(queue, "user-limit-factor"), 
			
 
				+        defaultUserLimitFactor);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Set the factor of queue capacity above which a single user in a queue
			
 
				+   * can consume resources.
			
 
				+   * 
			
 
				+   * @param queue queue name
			
 
				+   * @param userLimitFactor factor of queue capacity above which a single user 
			
 
				+   *                        in a queue can consume resources
			
 
				+   */
			
 
				+  public void setUserLimitFactor(String queue, float userLimitFactor) {
			
 
				+    rmConf.setFloat(toFullPropertyName(queue, "user-limit-factor"), 
			
 
				+        userLimitFactor);
			
 
				+  }
			
 
				+  
			
 
				   /**
			
 
				    * Reload configuration by clearing the information read from the 
			
 
				    * underlying configuration file.
			
@@ -253,38 +356,81 @@ class CapacitySchedulerConf {
 
				       return QUEUE_CONF_PROPERTY_NAME_PREFIX + queue + "." + property;
			
 
				   }
			
 
				 
			
 
				+  public int getMaxSystemJobs() {
			
 
				+    int maxSystemJobs = 
			
 
				+      rmConf.getInt(MAX_SYSTEM_JOBS_KEY, DEFAULT_MAX_SYSTEM_JOBS);
			
 
				+    if (maxSystemJobs <= 0) {
			
 
				+      throw new IllegalArgumentException("Invalid maximum system jobs: " + 
			
 
				+          maxSystemJobs);
			
 
				+    }
			
 
				+    
			
 
				+    return maxSystemJobs;
			
 
				+  }
			
 
				+
			
 
				+  public void setMaxSystemJobs(int maxSystemJobs) {
			
 
				+    rmConf.setInt(MAX_SYSTEM_JOBS_KEY, maxSystemJobs);
			
 
				+  }
			
 
				+  
			
 
				+  public int getInitToAcceptJobsFactor(String queue) {
			
 
				+    int initToAccepFactor = 
			
 
				+      rmConf.getInt(toFullPropertyName(queue, "init-accept-jobs-factor"), 
			
 
				+          defaultInitToAcceptJobsFactor);
			
 
				+    if(initToAccepFactor <= 0) {
			
 
				+      throw new IllegalArgumentException(
			
 
				+          "Invalid maximum jobs per user configuration " + initToAccepFactor);
			
 
				+    }
			
 
				+    return initToAccepFactor;
			
 
				+  }
			
 
				+  
			
 
				+  public void setInitToAcceptJobsFactor(String queue, int initToAcceptFactor) {
			
 
				+    rmConf.setInt(toFullPropertyName(queue, "init-accept-jobs-factor"), 
			
 
				+        initToAcceptFactor);
			
 
				+  }
			
 
				+  
			
 
				   /**
			
 
				-   * Gets the maximum number of jobs which are allowed to initialize in the
			
 
				-   * job queue.
			
 
				+   * Get the maximum active tasks per queue to be initialized.
			
 
				    * 
			
 
				-   * @param queue queue name.
			
 
				-   * @return maximum number of jobs allowed to be initialized per user.
			
 
				-   * @throws IllegalArgumentException if maximum number of users is negative
			
 
				-   * or zero.
			
 
				+   * @param queue queue name
			
 
				    */
			
 
				-  public int getMaxJobsPerUserToInitialize(String queue) {
			
 
				-    int maxJobsPerUser = rmConf.getInt(toFullPropertyName(queue,
			
 
				-        "maximum-initialized-jobs-per-user"), 
			
 
				-        defaultMaxJobsPerUsersToInitialize);
			
 
				-    if(maxJobsPerUser <= 0) {
			
 
				-      throw new IllegalArgumentException(
			
 
				-          "Invalid maximum jobs per user configuration " + maxJobsPerUser);
			
 
				-    }
			
 
				-    return maxJobsPerUser;
			
 
				+  public int getMaxInitializedActiveTasks(String queue) {
			
 
				+    return rmConf.getInt(toFullPropertyName(queue, 
			
 
				+                                            "maximum-initialized-active-tasks"), 
			
 
				+                         defaultMaxActiveTasksPerQueueToInitialize);
			
 
				   }
			
 
				   
			
 
				   /**
			
 
				-   * Sets the maximum number of jobs which are allowed to be initialized 
			
 
				-   * for a user in the queue.
			
 
				+   * Set the maximum active tasks per queue to be initialized.
			
 
				    * 
			
 
				-   * @param queue queue name.
			
 
				-   * @param value maximum number of jobs allowed to be initialized per user.
			
 
				+   * @param queue queue name
			
 
				+   * @param value maximum active tasks
			
 
				    */
			
 
				-  public void setMaxJobsPerUserToInitialize(String queue, int value) {
			
 
				-    rmConf.setInt(toFullPropertyName(queue, 
			
 
				-        "maximum-initialized-jobs-per-user"), value);
			
 
				+  public void setMaxInitializedActiveTasks(String queue, int value) {
			
 
				+    rmConf.setInt(toFullPropertyName(queue, "maximum-initialized-active-tasks"), 
			
 
				+                  value);
			
 
				   }
			
 
				-
			
 
				+  
			
 
				+  /**
			
 
				+   * Get the maximum active tasks per-user, per-queue to be initialized.
			
 
				+   * 
			
 
				+   * @param queue queue name
			
 
				+   */
			
 
				+  public int getMaxInitializedActiveTasksPerUser(String queue) {
			
 
				+    return rmConf.getInt(toFullPropertyName(queue, 
			
 
				+                                            "maximum-initialized-active-tasks-per-user"), 
			
 
				+                         defaultMaxActiveTasksPerUserToInitialize);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Set the maximum active tasks per-user, per-queue to be initialized.
			
 
				+   * 
			
 
				+   * @param queue queue name
			
 
				+   * @param value maximum active tasks
			
 
				+   */
			
 
				+  public void setMaxInitializedActiveTasksPerUser(String queue, int value) {
			
 
				+    rmConf.setInt(toFullPropertyName(queue, "maximum-initialized-active-tasks-per-user"), 
			
 
				+                  value);
			
 
				+  }
			
 
				+  
			
 
				   /**
			
 
				    * Amount of time in milliseconds which poller thread and initialization
			
 
				    * thread would sleep before looking at the queued jobs.
			
@@ -357,4 +503,52 @@ class CapacitySchedulerConf {
 
				     rmConf.setInt(
			
 
				         "mapred.capacity-scheduler.init-worker-threads", poolSize);
			
 
				   }
			
 
				+  
			
 
				+  /**
			
 
				+   * Get the maximum number of tasks which can be scheduled in a heartbeat.
			
 
				+   * @return the maximum number of tasks which can be scheduled in a heartbeat
			
 
				+   */
			
 
				+  public int getMaxTasksPerHeartbeat() {
			
 
				+    return rmConf.getInt(
			
 
				+        "mapred.capacity-scheduler.maximum-tasks-per-heartbeat", 
			
 
				+        Short.MAX_VALUE);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the maximum number of tasks which can be scheduled in a heartbeat
			
 
				+   * @param maxTasksPerHeartbeat the maximum number of tasks which can be 
			
 
				+   *                             scheduled in a heartbeat
			
 
				+   */
			
 
				+  public void setMaxTasksPerHeartbeat(int maxTasksPerHeartbeat) {
			
 
				+    rmConf.setInt("mapred.capacity-scheduler.maximum-tasks-per-heartbeat", 
			
 
				+        maxTasksPerHeartbeat);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Get the maximum number of tasks to schedule, per heartbeat, after an
			
 
				+   * off-switch task has been assigned.
			
 
				+   * 
			
 
				+   * @return the maximum number of tasks to schedule, per heartbeat, after an
			
 
				+   *         off-switch task has been assigned
			
 
				+   */
			
 
				+  public int getMaxTasksToAssignAfterOffSwitch() {
			
 
				+    return rmConf.getInt(
			
 
				+        "mapred.capacity-scheduler.maximum-tasks-after-offswitch", 
			
 
				+        DEFAULT_MAX_TASKS_TO_SCHEDULE_AFTER_OFFSWITCH);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Set the maximum number of tasks to schedule, per heartbeat, after an
			
 
				+   * off-switch task has been assigned.
			
 
				+   * 
			
 
				+   * @param maxTasksToAssignAfterOffSwitch the maximum number of tasks to 
			
 
				+   *                                       schedule, per heartbeat, after an
			
 
				+   *                                       off-switch task has been assigned
			
 
				+   */
			
 
				+  public void setMaxTasksToAssignAfterOffSwitch(
			
 
				+      int maxTasksToAssignAfterOffSwitch) {
			
 
				+    rmConf.setInt(
			
 
				+        "mapred.capacity-scheduler.maximum-tasks-after-offswitch", 
			
 
				+        maxTasksToAssignAfterOffSwitch);
			
 
				+  }
			
 
				 }
			
--- a/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/CapacitySchedulerQueue.java
+++ b/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/CapacitySchedulerQueue.java
@@ -0,0 +1,1340 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.Collection;
			
 
				+import java.util.Collections;
			
 
				+import java.util.Comparator;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.HashSet;
			
 
				+import java.util.LinkedList;
			
 
				+import java.util.Map;
			
 
				+import java.util.Set;
			
 
				+import java.util.TreeMap;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+
			
 
				+import org.apache.hadoop.mapreduce.TaskType;
			
 
				+import org.apache.hadoop.mapred.CapacityTaskScheduler.TaskSchedulingMgr;
			
 
				+import org.apache.hadoop.mapred.JobQueueJobInProgressListener.JobSchedulingInfo;
			
 
				+
			
 
				+
			
 
				+/***********************************************************************
			
 
				+ * Keeping track of scheduling information for queues
			
 
				+ * 
			
 
				+ * We need to maintain scheduling information relevant to a queue (its 
			
 
				+ * name, capacity, etc), along with information specific to 
			
 
				+ * each kind of task, Map or Reduce (num of running tasks, pending 
			
 
				+ * tasks etc). 
			
 
				+ * 
			
 
				+ * This scheduling information is used to decide how to allocate
			
 
				+ * tasks, redistribute capacity, etc.
			
 
				+ *  
			
 
				+ * A QueueSchedulingInfo(QSI) object represents scheduling information for
			
 
				+ * a  A TaskSchedulingInfo (TSI) object represents scheduling 
			
 
				+ * information for a particular kind of task (Map or Reduce).
			
 
				+ *   
			
 
				+ **********************************************************************/
			
 
				+class CapacitySchedulerQueue {
			
 
				+  
			
 
				+  static final Log LOG = LogFactory.getLog(CapacityTaskScheduler.class);
			
 
				+  
			
 
				+  private static class SlotsUsage {
			
 
				+    /** 
			
 
				+     * the actual capacity, which depends on how many slots are available
			
 
				+     * in the cluster at any given time. 
			
 
				+     */
			
 
				+    private int capacity = 0;
			
 
				+    // number of running tasks
			
 
				+    int numRunningTasks = 0;
			
 
				+    // number of slots occupied by running tasks
			
 
				+    int numSlotsOccupied = 0;
			
 
				+  
			
 
				+    //the actual maximum capacity which depends on how many slots are available
			
 
				+    //in cluster at any given time.
			
 
				+    private int maxCapacity = -1;
			
 
				+  
			
 
				+    // Active users
			
 
				+    Set<String> users = new HashSet<String>();
			
 
				+    
			
 
				+    /**
			
 
				+     * for each user, we need to keep track of number of slots occupied by
			
 
				+     * running tasks
			
 
				+     */
			
 
				+    Map<String, Integer> numSlotsOccupiedByUser = 
			
 
				+      new HashMap<String, Integer>();
			
 
				+  
			
 
				+    /**
			
 
				+     * reset the variables associated with tasks
			
 
				+     */
			
 
				+    void reset() {
			
 
				+      numRunningTasks = 0;
			
 
				+      numSlotsOccupied = 0;
			
 
				+      users.clear();
			
 
				+      numSlotsOccupiedByUser.clear();
			
 
				+    }
			
 
				+  
			
 
				+  
			
 
				+    /**
			
 
				+     * Returns the actual capacity.
			
 
				+     * capacity.
			
 
				+     *
			
 
				+     * @return
			
 
				+     */
			
 
				+    int getCapacity() {
			
 
				+      return capacity;
			
 
				+    }
			
 
				+  
			
 
				+    /**
			
 
				+     * Mutator method for capacity
			
 
				+     *
			
 
				+     * @param capacity
			
 
				+     */
			
 
				+    void setCapacity(int capacity) {
			
 
				+        this.capacity = capacity;
			
 
				+    }
			
 
				+  
			
 
				+    /**
			
 
				+     * @return the numRunningTasks
			
 
				+     */
			
 
				+    int getNumRunningTasks() {
			
 
				+      return numRunningTasks;
			
 
				+    }
			
 
				+  
			
 
				+    /**
			
 
				+     * @return the numSlotsOccupied
			
 
				+     */
			
 
				+    int getNumSlotsOccupied() {
			
 
				+      return numSlotsOccupied;
			
 
				+    }
			
 
				+  
			
 
				+    /**
			
 
				+     * @return number of active users
			
 
				+     */
			
 
				+    int getNumActiveUsers() {
			
 
				+      return users.size();
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * return information about the tasks
			
 
				+     */
			
 
				+    @Override
			
 
				+    public String toString() {
			
 
				+      float occupiedSlotsAsPercent =
			
 
				+          getCapacity() != 0 ?
			
 
				+            ((float) numSlotsOccupied * 100 / getCapacity()) : 0;
			
 
				+      StringBuffer sb = new StringBuffer();
			
 
				+      
			
 
				+      sb.append("Capacity: " + capacity + " slots\n");
			
 
				+      
			
 
				+      if(getMaxCapacity() >= 0) {
			
 
				+        sb.append("Maximum capacity: " + getMaxCapacity() +" slots\n");
			
 
				+      }
			
 
				+      sb.append(String.format("Used capacity: %d (%.1f%% of Capacity)\n",
			
 
				+          Integer.valueOf(numSlotsOccupied), Float
			
 
				+              .valueOf(occupiedSlotsAsPercent)));
			
 
				+      sb.append(String.format("Running tasks: %d\n", Integer
			
 
				+          .valueOf(numRunningTasks)));
			
 
				+      // include info on active users
			
 
				+      if (numSlotsOccupied != 0) {
			
 
				+        sb.append("Active users:\n");
			
 
				+        for (Map.Entry<String, Integer> entry : numSlotsOccupiedByUser
			
 
				+            .entrySet()) {
			
 
				+          if ((entry.getValue() == null) || (entry.getValue().intValue() <= 0)) {
			
 
				+            // user has no tasks running
			
 
				+            continue;
			
 
				+          }
			
 
				+          sb.append("User '" + entry.getKey() + "': ");
			
 
				+          int numSlotsOccupiedByThisUser = entry.getValue().intValue();
			
 
				+          float p =
			
 
				+              (float) numSlotsOccupiedByThisUser * 100 / numSlotsOccupied;
			
 
				+          sb.append(String.format("%d (%.1f%% of used capacity)\n", Long
			
 
				+              .valueOf(numSlotsOccupiedByThisUser), Float.valueOf(p)));
			
 
				+        }
			
 
				+      }
			
 
				+      return sb.toString();
			
 
				+    }
			
 
				+  
			
 
				+    int getMaxCapacity() {
			
 
				+      return maxCapacity;
			
 
				+    }
			
 
				+  
			
 
				+    void setMaxCapacity(int maxCapacity) {
			
 
				+      this.maxCapacity = maxCapacity;
			
 
				+    }
			
 
				+    
			
 
				+    int getNumSlotsOccupiedByUser(String user) {
			
 
				+      Integer slots = numSlotsOccupiedByUser.get(user);
			
 
				+      return (slots != null) ? slots : 0;
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    void updateCapacities(float capacityPercent, float maxCapacityPercent, 
			
 
				+                          int clusterCapacity) {
			
 
				+      //compute new capacity
			
 
				+      setCapacity((int)(capacityPercent*clusterCapacity/100));
			
 
				+
			
 
				+      //compute new max map capacities
			
 
				+      if(maxCapacityPercent > 0) {
			
 
				+        setMaxCapacity((int)(maxCapacityPercent*clusterCapacity / 100));
			
 
				+      }
			
 
				+    }
			
 
				+    
			
 
				+    void updateSlotsUsage(String user, int pendingTasks, int numRunningTasks, int numSlotsOccupied) {
			
 
				+      this.numRunningTasks += numRunningTasks;
			
 
				+      this.numSlotsOccupied += numSlotsOccupied;
			
 
				+      Integer i = this.numSlotsOccupiedByUser.get(user);
			
 
				+      int slots = numSlotsOccupied + ((i == null) ? 0 : i.intValue());
			
 
				+      this.numSlotsOccupiedByUser.put(user, slots);
			
 
				+      if (pendingTasks > 0) {
			
 
				+        users.add(user);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // Queue name
			
 
				+  final String queueName;
			
 
				+
			
 
				+  /**
			
 
				+   * capacity(%) is set in the config
			
 
				+   */
			
 
				+  volatile float capacityPercent = 0;
			
 
				+  
			
 
				+  
			
 
				+  /**
			
 
				+   * maxCapacityPercent(%) is set in config as
			
 
				+   * mapred.capacity-scheduler.<queue-name>.maximum-capacity
			
 
				+   * maximum-capacity percent defines a limit beyond which a queue
			
 
				+   * cannot expand. Remember this limit is dynamic and changes w.r.t
			
 
				+   * cluster size.
			
 
				+   */
			
 
				+  volatile float maxCapacityPercent = -1;
			
 
				+  
			
 
				+  /** 
			
 
				+   * to handle user limits, we need to know how many users have jobs in 
			
 
				+   * the 
			
 
				+   */  
			
 
				+  Map<String, Integer> numJobsByUser = new HashMap<String, Integer>();
			
 
				+    
			
 
				+  /**
			
 
				+   * min value of user limit (same for all users)
			
 
				+   */
			
 
				+  volatile int ulMin;
			
 
				+
			
 
				+  /**
			
 
				+   * The factor of queue-capacity above which a single user can consume
			
 
				+   * queue resources.
			
 
				+   */
			
 
				+  volatile float ulMinFactor;
			
 
				+  
			
 
				+  /**
			
 
				+   * We keep a TaskSchedulingInfo object for each kind of task we support
			
 
				+   */
			
 
				+  CapacitySchedulerQueue.SlotsUsage mapSlots;
			
 
				+  CapacitySchedulerQueue.SlotsUsage reduceSlots;
			
 
				+  
			
 
				+  /** 
			
 
				+   * Whether the queue supports priorities.
			
 
				+   */
			
 
				+  final boolean supportsPriorities;
			
 
				+  
			
 
				+  /**
			
 
				+   * Information required to track job, user, queue limits 
			
 
				+   */
			
 
				+  
			
 
				+  Map<JobSchedulingInfo, JobInProgress> waitingJobs; // for waiting jobs
			
 
				+  Map<JobSchedulingInfo, JobInProgress> initializingJobs; // for init'ing jobs
			
 
				+  Map<JobSchedulingInfo, JobInProgress> runningJobs; // for running jobs
			
 
				+  
			
 
				+  /**
			
 
				+   *  Active tasks in the queue
			
 
				+   */
			
 
				+  int activeTasks = 0;
			
 
				+  
			
 
				+  /**
			
 
				+   *  Users in the queue
			
 
				+   */
			
 
				+  Map<String, UserInfo> users = new HashMap<String, UserInfo>();
			
 
				+
			
 
				+  /**
			
 
				+   * Comparator for ordering jobs in this queue
			
 
				+   */
			
 
				+  public Comparator<JobSchedulingInfo> comparator;
			
 
				+  
			
 
				+  int maxJobsToInit;
			
 
				+  int maxJobsToAccept;
			
 
				+  int maxJobsPerUserToInit;
			
 
				+  int maxJobsPerUserToAccept;
			
 
				+  int maxActiveTasks;
			
 
				+  int maxActiveTasksPerUser;
			
 
				+
			
 
				+  // comparator for jobs in queues that don't support priorities
			
 
				+  private static final Comparator<JobSchedulingInfo> STARTTIME_JOB_COMPARATOR
			
 
				+    = new Comparator<JobSchedulingInfo>() {
			
 
				+    public int compare(JobSchedulingInfo o1, JobSchedulingInfo o2) {
			
 
				+      // the job that started earlier wins
			
 
				+      if (o1.getStartTime() < o2.getStartTime()) {
			
 
				+        return -1;
			
 
				+      } else {
			
 
				+        return (o1.getStartTime() == o2.getStartTime() 
			
 
				+                ? o1.getJobID().compareTo(o2.getJobID()) 
			
 
				+                : 1);
			
 
				+      }
			
 
				+    }
			
 
				+  };
			
 
				+
			
 
				+  public CapacitySchedulerQueue(String queueName, CapacitySchedulerConf conf) {
			
 
				+    this.queueName = queueName;
			
 
				+
			
 
				+    // Do not allow changes to 'supportsPriorities'
			
 
				+    supportsPriorities = conf.isPrioritySupported(queueName);
			
 
				+
			
 
				+    initializeQueue(conf);
			
 
				+
			
 
				+    if (supportsPriorities) {
			
 
				+      // use the default priority-aware comparator
			
 
				+      comparator = JobQueueJobInProgressListener.FIFO_JOB_QUEUE_COMPARATOR;
			
 
				+    }
			
 
				+    else {
			
 
				+      comparator = STARTTIME_JOB_COMPARATOR;
			
 
				+    }
			
 
				+    this.waitingJobs = 
			
 
				+      new TreeMap<JobSchedulingInfo, JobInProgress>(comparator);
			
 
				+    this.initializingJobs =
			
 
				+      new TreeMap<JobSchedulingInfo, JobInProgress>(comparator);
			
 
				+    this.runningJobs = 
			
 
				+      new TreeMap<JobSchedulingInfo, JobInProgress>(comparator);
			
 
				+
			
 
				+    this.mapSlots = new SlotsUsage();
			
 
				+    this.reduceSlots = new SlotsUsage();    
			
 
				+  }
			
 
				+  
			
 
				+  synchronized void init(float capacityPercent, float maxCapacityPercent,
			
 
				+      int ulMin, float ulMinFactor,
			
 
				+      int maxJobsToInit, int maxJobsPerUserToInit,
			
 
				+      int maxActiveTasks, int maxActiveTasksPerUser,
			
 
				+      int maxJobsToAccept, int maxJobsPerUserToAccept) {
			
 
				+    this.capacityPercent = capacityPercent;
			
 
				+    this.maxCapacityPercent = maxCapacityPercent;
			
 
				+    this.ulMin = ulMin;
			
 
				+    this.ulMinFactor = ulMinFactor;
			
 
				+    
			
 
				+    this.maxJobsToInit = maxJobsToInit;
			
 
				+    this.maxJobsPerUserToInit = maxJobsPerUserToInit; 
			
 
				+    this.maxActiveTasks = maxActiveTasks;
			
 
				+    this.maxActiveTasksPerUser = maxActiveTasksPerUser; 
			
 
				+    this.maxJobsToAccept = maxJobsToAccept;
			
 
				+    this.maxJobsPerUserToAccept = maxJobsPerUserToAccept;
			
 
				+    
			
 
				+    LOG.info("Initializing '" + queueName + "' queue with " +
			
 
				+        "cap=" + capacityPercent + ", " +
			
 
				+        "maxCap=" + maxCapacityPercent + ", " +
			
 
				+        "ulMin=" + ulMin + ", " +
			
 
				+        "ulMinFactor=" + ulMinFactor + ", " +
			
 
				+        "supportsPriorities=" + supportsPriorities + ", " +
			
 
				+        "maxJobsToInit=" + maxJobsToInit + ", " +
			
 
				+        "maxJobsToAccept=" + maxJobsToAccept + ", " +
			
 
				+        "maxActiveTasks=" + maxActiveTasks + ", " +
			
 
				+        "maxJobsPerUserToInit=" + maxJobsPerUserToInit + ", " +
			
 
				+        "maxJobsPerUserToAccept=" + maxJobsPerUserToAccept + ", " +
			
 
				+        "maxActiveTasksPerUser=" + maxActiveTasksPerUser
			
 
				+    );
			
 
				+    
			
 
				+    // Sanity checks
			
 
				+    if (maxActiveTasks < maxActiveTasksPerUser ||
			
 
				+        maxJobsToInit < maxJobsPerUserToInit || 
			
 
				+        maxJobsToAccept < maxJobsPerUserToAccept) {
			
 
				+      throw new IllegalArgumentException("Illegal queue configuration for " +
			
 
				+      		"queue '" + queueName + "'");
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  synchronized void initializeQueue(CapacitySchedulerQueue other) {
			
 
				+    init(other.capacityPercent, other.maxCapacityPercent, 
			
 
				+        other.ulMin, other.ulMinFactor, 
			
 
				+        other.maxJobsToInit, other.maxJobsPerUserToInit, 
			
 
				+        other.maxActiveTasks, other.maxActiveTasksPerUser, 
			
 
				+        other.maxJobsToAccept, other.maxJobsPerUserToAccept);
			
 
				+  }
			
 
				+  
			
 
				+  synchronized void initializeQueue(CapacitySchedulerConf conf) {
			
 
				+    float capacityPercent = conf.getCapacity(queueName);
			
 
				+    float maxCapacityPercent = conf.getMaxCapacity(queueName);
			
 
				+    int ulMin = conf.getMinimumUserLimitPercent(queueName);
			
 
				+    float ulMinFactor = conf.getUserLimitFactor(queueName);
			
 
				+    
			
 
				+    int maxSystemJobs = conf.getMaxSystemJobs();
			
 
				+    int maxJobsToInit = (int)Math.ceil(maxSystemJobs * capacityPercent/100.0);
			
 
				+    int maxJobsPerUserToInit = 
			
 
				+      (int)Math.ceil(maxSystemJobs * capacityPercent/100.0 * ulMin/100.0);
			
 
				+    int maxActiveTasks = conf.getMaxInitializedActiveTasks(queueName);
			
 
				+    int maxActiveTasksPerUser = 
			
 
				+      conf.getMaxInitializedActiveTasksPerUser(queueName);
			
 
				+
			
 
				+    int jobInitToAcceptFactor = conf.getInitToAcceptJobsFactor(queueName);
			
 
				+    int maxJobsToAccept = maxJobsToInit * jobInitToAcceptFactor;
			
 
				+    int maxJobsPerUserToAccept = maxJobsPerUserToInit * jobInitToAcceptFactor;
			
 
				+    
			
 
				+    init(capacityPercent, maxCapacityPercent, 
			
 
				+        ulMin, ulMinFactor, 
			
 
				+        maxJobsToInit, maxJobsPerUserToInit, 
			
 
				+        maxActiveTasks, maxActiveTasksPerUser, 
			
 
				+        maxJobsToAccept, maxJobsPerUserToAccept);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * @return the queueName
			
 
				+   */
			
 
				+  String getQueueName() {
			
 
				+    return queueName;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * @return the capacityPercent
			
 
				+   */
			
 
				+  float getCapacityPercent() {
			
 
				+    return capacityPercent;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * reset the variables associated with tasks
			
 
				+   */
			
 
				+  void resetSlotsUsage(TaskType taskType) {
			
 
				+    if (taskType == TaskType.MAP) {
			
 
				+      mapSlots.reset();
			
 
				+    } else if (taskType == TaskType.REDUCE) {
			
 
				+      reduceSlots.reset();
			
 
				+    } else {    
			
 
				+      throw new IllegalArgumentException("Illegal taskType=" + taskType);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  /**
			
 
				+   * Returns the actual capacity in terms of slots for the <code>taskType</code>.
			
 
				+   * @param taskType
			
 
				+   * @return actual capacity in terms of slots for the <code>taskType</code>
			
 
				+   */
			
 
				+  int getCapacity(TaskType taskType) {
			
 
				+    if (taskType == TaskType.MAP) {
			
 
				+      return mapSlots.getCapacity();
			
 
				+    } else if (taskType == TaskType.REDUCE) {
			
 
				+      return reduceSlots.getCapacity();
			
 
				+    }
			
 
				+
			
 
				+    throw new IllegalArgumentException("Illegal taskType=" + taskType);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the number of running tasks of the given <code>taskType</code>.
			
 
				+   * @param taskType
			
 
				+   * @return
			
 
				+   */
			
 
				+  int getNumRunningTasks(TaskType taskType) {
			
 
				+    if (taskType == TaskType.MAP) {
			
 
				+      return mapSlots.getNumRunningTasks();
			
 
				+    } else if (taskType == TaskType.REDUCE) {
			
 
				+      return reduceSlots.getNumRunningTasks();
			
 
				+    }
			
 
				+    
			
 
				+    throw new IllegalArgumentException("Illegal taskType=" + taskType);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get number of slots occupied of the <code>taskType</code>.
			
 
				+   * @param taskType
			
 
				+   * @return number of slots occupied of the <code>taskType</code>
			
 
				+   */
			
 
				+  int getNumSlotsOccupied(TaskType taskType) {
			
 
				+    if (taskType == TaskType.MAP) {
			
 
				+      return mapSlots.getNumSlotsOccupied();
			
 
				+    } else if (taskType == TaskType.REDUCE) {
			
 
				+      return reduceSlots.getNumSlotsOccupied();
			
 
				+    }
			
 
				+    
			
 
				+    throw new IllegalArgumentException("Illegal taskType=" + taskType);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get maximum number of slots for the <code>taskType</code>.
			
 
				+   * @param taskType
			
 
				+   * @return maximum number of slots for the <code>taskType</code>
			
 
				+   */
			
 
				+  int getMaxCapacity(TaskType taskType) {
			
 
				+    if (taskType == TaskType.MAP) {
			
 
				+      return mapSlots.getMaxCapacity();
			
 
				+    } else if (taskType == TaskType.REDUCE) {
			
 
				+      return reduceSlots.getMaxCapacity();
			
 
				+    }
			
 
				+    
			
 
				+    throw new IllegalArgumentException("Illegal taskType=" + taskType);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get number of slots occupied by a <code>user</code> of 
			
 
				+   * <code>taskType</code>.
			
 
				+   * @param user
			
 
				+   * @param taskType
			
 
				+   * @return number of slots occupied by a <code>user</code> of 
			
 
				+   *         <code>taskType</code>
			
 
				+   */
			
 
				+  int getNumSlotsOccupiedByUser(String user, TaskType taskType) {
			
 
				+    if (taskType == TaskType.MAP) {
			
 
				+      return mapSlots.getNumSlotsOccupiedByUser(user);
			
 
				+    } else if (taskType == TaskType.REDUCE) {
			
 
				+      return reduceSlots.getNumSlotsOccupiedByUser(user);
			
 
				+    }
			
 
				+    
			
 
				+    throw new IllegalArgumentException("Illegal taskType=" + taskType);
			
 
				+  }
			
 
				+  
			
 
				+  int getNumActiveUsersByTaskType(TaskType taskType) {
			
 
				+    if (taskType == TaskType.MAP) {
			
 
				+      return mapSlots.getNumActiveUsers();
			
 
				+    } else if (taskType == TaskType.REDUCE) {
			
 
				+      return reduceSlots.getNumActiveUsers();
			
 
				+    }
			
 
				+    
			
 
				+    throw new IllegalArgumentException("Illegal taskType=" + taskType);
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * A new job is added to the 
			
 
				+   * @param job
			
 
				+   */
			
 
				+  void jobAdded(JobInProgress job) {
			
 
				+    // update user-specific info
			
 
				+    String user = job.getProfile().getUser();
			
 
				+    
			
 
				+    Integer i = numJobsByUser.get(user);
			
 
				+    if (null == i) {
			
 
				+      i = 1;
			
 
				+      // set the count for running tasks to 0
			
 
				+      mapSlots.numSlotsOccupiedByUser.put(user, 0);
			
 
				+      reduceSlots.numSlotsOccupiedByUser.put(user, 0);
			
 
				+    }
			
 
				+    else {
			
 
				+      i++;
			
 
				+    }
			
 
				+    numJobsByUser.put(user, i);
			
 
				+  }
			
 
				+  
			
 
				+  int getNumJobsByUser(String user) {
			
 
				+    Integer numJobs = numJobsByUser.get(user);
			
 
				+    return (numJobs != null) ? numJobs : 0;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * A job from the queue has completed.
			
 
				+   * @param job
			
 
				+   */
			
 
				+  void jobCompleted(JobInProgress job) {
			
 
				+    String user = job.getProfile().getUser();
			
 
				+    // update numJobsByUser
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("Job to be removed for user " + user);
			
 
				+    }
			
 
				+    Integer i = numJobsByUser.get(job.getProfile().getUser());
			
 
				+    i--;  // i should never be null!
			
 
				+    if (0 == i.intValue()) {
			
 
				+      numJobsByUser.remove(user);
			
 
				+      // remove job footprint from our TSIs
			
 
				+      mapSlots.numSlotsOccupiedByUser.remove(user);
			
 
				+      reduceSlots.numSlotsOccupiedByUser.remove(user);
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("No more jobs for user, number of users = " + 
			
 
				+            numJobsByUser.size());
			
 
				+      }
			
 
				+    }
			
 
				+    else {
			
 
				+      numJobsByUser.put(user, i);
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("User still has " + i + " jobs, number of users = "
			
 
				+                + numJobsByUser.size());
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Update queue usage.
			
 
				+   * @param type
			
 
				+   * @param user
			
 
				+   * @param numRunningTasks
			
 
				+   * @param numSlotsOccupied
			
 
				+   */
			
 
				+  void update(TaskType type, JobInProgress job, String user, 
			
 
				+      int numRunningTasks, int numSlotsOccupied) {
			
 
				+    if (type == TaskType.MAP) {
			
 
				+      mapSlots.updateSlotsUsage(user, job.pendingMaps(), 
			
 
				+          numRunningTasks, numSlotsOccupied);
			
 
				+    } else if (type == TaskType.REDUCE) {
			
 
				+      reduceSlots.updateSlotsUsage(user, job.pendingReduces(), 
			
 
				+          numRunningTasks, numSlotsOccupied);
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Update queue usage across all running jobs.
			
 
				+   * @param mapClusterCapacity
			
 
				+   * @param reduceClusterCapacity
			
 
				+   * @param mapScheduler
			
 
				+   * @param reduceScheduler
			
 
				+   */
			
 
				+  void updateAll(int mapClusterCapacity, int reduceClusterCapacity, 
			
 
				+      TaskSchedulingMgr mapScheduler, TaskSchedulingMgr reduceScheduler) {
			
 
				+   // Compute new capacities for maps and reduces
			
 
				+    mapSlots.updateCapacities(capacityPercent, maxCapacityPercent, 
			
 
				+        mapClusterCapacity);
			
 
				+    reduceSlots.updateCapacities(capacityPercent, maxCapacityPercent, 
			
 
				+        reduceClusterCapacity);
			
 
				+
			
 
				+    // reset running/pending tasks, tasks per user
			
 
				+    resetSlotsUsage(TaskType.MAP);
			
 
				+    resetSlotsUsage(TaskType.REDUCE);
			
 
				+    
			
 
				+    Collection<JobInProgress> jobs = getRunningJobs(); // Safe to iterate since
			
 
				+                                                       // we get a copy here
			
 
				+    for (JobInProgress j : jobs) {
			
 
				+      if (j.getStatus().getRunState() != JobStatus.RUNNING) {
			
 
				+        continue;
			
 
				+      }
			
 
				+
			
 
				+      int numMapsRunningForThisJob = mapScheduler.getRunningTasks(j);
			
 
				+      int numReducesRunningForThisJob = reduceScheduler.getRunningTasks(j);
			
 
				+      int numRunningMapSlots = 
			
 
				+        numMapsRunningForThisJob * mapScheduler.getSlotsPerTask(j);
			
 
				+      int numRunningReduceSlots =
			
 
				+        numReducesRunningForThisJob * reduceScheduler.getSlotsPerTask(j);
			
 
				+      int numMapSlotsForThisJob = mapScheduler.getSlotsOccupied(j);
			
 
				+      int numReduceSlotsForThisJob = reduceScheduler.getSlotsOccupied(j);
			
 
				+      int numReservedMapSlotsForThisJob = 
			
 
				+        (mapScheduler.getNumReservedTaskTrackers(j) * 
			
 
				+         mapScheduler.getSlotsPerTask(j)); 
			
 
				+      int numReservedReduceSlotsForThisJob = 
			
 
				+        (reduceScheduler.getNumReservedTaskTrackers(j) * 
			
 
				+         reduceScheduler.getSlotsPerTask(j)); 
			
 
				+      
			
 
				+      j.setSchedulingInfo(
			
 
				+          CapacityTaskScheduler.getJobQueueSchedInfo(numMapsRunningForThisJob, 
			
 
				+              numRunningMapSlots,
			
 
				+              numReservedMapSlotsForThisJob,
			
 
				+              numReducesRunningForThisJob, 
			
 
				+              numRunningReduceSlots,
			
 
				+              numReservedReduceSlotsForThisJob));
			
 
				+
			
 
				+      update(TaskType.MAP, j, j.getProfile().getUser(), 
			
 
				+          numMapsRunningForThisJob, numMapSlotsForThisJob);
			
 
				+      update(TaskType.REDUCE, j, j.getProfile().getUser(), 
			
 
				+          numReducesRunningForThisJob, numReduceSlotsForThisJob);
			
 
				+
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug(String.format(queueName + " - updateQSI: job %s: run(m)=%d, "
			
 
				+            + "occupied(m)=%d, run(r)=%d, occupied(r)=%d, finished(m)=%d,"
			
 
				+            + " finished(r)=%d, failed(m)=%d, failed(r)=%d, "
			
 
				+            + "spec(m)=%d, spec(r)=%d, total(m)=%d, total(r)=%d", j
			
 
				+            .getJobID().toString(), Integer
			
 
				+            .valueOf(numMapsRunningForThisJob), Integer
			
 
				+            .valueOf(numMapSlotsForThisJob), Integer
			
 
				+            .valueOf(numReducesRunningForThisJob), Integer
			
 
				+            .valueOf(numReduceSlotsForThisJob), Integer.valueOf(j
			
 
				+            .finishedMaps()), Integer.valueOf(j.finishedReduces()), Integer
			
 
				+            .valueOf(j.failedMapTasks),
			
 
				+            Integer.valueOf(j.failedReduceTasks), Integer
			
 
				+                .valueOf(j.speculativeMapTasks), Integer
			
 
				+                .valueOf(j.speculativeReduceTasks), Integer
			
 
				+                .valueOf(j.numMapTasks), Integer.valueOf(j.numReduceTasks)));
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  boolean doesQueueSupportPriorities() {
			
 
				+    return supportsPriorities;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * return information about the queue
			
 
				+   *
			
 
				+   * @return a String representing the information about the 
			
 
				+   */
			
 
				+  @Override
			
 
				+  public String toString(){
			
 
				+    // We print out the queue information first, followed by info
			
 
				+    // on map and reduce tasks and job info
			
 
				+    StringBuilder sb = new StringBuilder();
			
 
				+    sb.append("Queue configuration\n");
			
 
				+    sb.append("Capacity Percentage: ");
			
 
				+    sb.append(capacityPercent);
			
 
				+    sb.append("%\n");
			
 
				+    sb.append("User Limit: " + ulMin + "%\n");
			
 
				+    sb.append("Priority Supported: " +
			
 
				+        (doesQueueSupportPriorities() ? "YES":"NO") + "\n");
			
 
				+    sb.append("-------------\n");
			
 
				+
			
 
				+    sb.append("Map tasks\n");
			
 
				+    sb.append(mapSlots.toString());
			
 
				+    sb.append("-------------\n");
			
 
				+    sb.append("Reduce tasks\n");
			
 
				+    sb.append(reduceSlots.toString());
			
 
				+    sb.append("-------------\n");
			
 
				+    
			
 
				+    sb.append("Job info\n");
			
 
				+    sb.append("Number of Waiting Jobs: " + getNumWaitingJobs() + "\n");
			
 
				+    sb.append("Number of Initializing Jobs: " + getNumInitializingJobs() + "\n");
			
 
				+    sb.append("Number of users who have submitted jobs: " + 
			
 
				+        numJobsByUser.size() + "\n");
			
 
				+    return sb.toString();
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Functionality to deal with job initialization
			
 
				+   */
			
 
				+
			
 
				+  
			
 
				+  // per-user information
			
 
				+  static class UserInfo {
			
 
				+    
			
 
				+    Map<JobSchedulingInfo, JobInProgress> waitingJobs; // for waiting jobs
			
 
				+    Map<JobSchedulingInfo, JobInProgress> initializingJobs; // for init'ing jobs
			
 
				+    Map<JobSchedulingInfo, JobInProgress> runningJobs; // for running jobs
			
 
				+    
			
 
				+    int activeTasks;
			
 
				+    
			
 
				+    public UserInfo(Comparator<JobSchedulingInfo> comparator) {
			
 
				+      waitingJobs = new TreeMap<JobSchedulingInfo, JobInProgress>(comparator);
			
 
				+      initializingJobs = new TreeMap<JobSchedulingInfo, JobInProgress>(comparator);
			
 
				+      runningJobs = new TreeMap<JobSchedulingInfo, JobInProgress>(comparator);
			
 
				+    }
			
 
				+    
			
 
				+    int getNumInitializingJobs() {
			
 
				+      return initializingJobs.size();
			
 
				+    }
			
 
				+    
			
 
				+    int getNumRunningJobs() {
			
 
				+      return runningJobs.size();
			
 
				+    }
			
 
				+    
			
 
				+    int getNumWaitingJobs() {
			
 
				+      return waitingJobs.size();
			
 
				+    }
			
 
				+    
			
 
				+    int getNumActiveTasks() {
			
 
				+      return activeTasks;
			
 
				+    }
			
 
				+    
			
 
				+    public void jobAdded(JobSchedulingInfo jobSchedInfo, JobInProgress job) {
			
 
				+      waitingJobs.put(jobSchedInfo, job); 
			
 
				+    }
			
 
				+    
			
 
				+    public void removeWaitingJob(JobSchedulingInfo jobSchedInfo) {
			
 
				+      waitingJobs.remove(jobSchedInfo);
			
 
				+    }
			
 
				+    
			
 
				+    public void jobInitializing(JobSchedulingInfo jobSchedInfo, 
			
 
				+        JobInProgress job) {
			
 
				+      if (!initializingJobs.containsKey(jobSchedInfo)) {
			
 
				+        initializingJobs.put(jobSchedInfo, job);
			
 
				+        activeTasks += job.desiredTasks();
			
 
				+      }
			
 
				+    }
			
 
				+    
			
 
				+    public void removeInitializingJob(JobSchedulingInfo jobSchedInfo) {
			
 
				+      initializingJobs.remove(jobSchedInfo);
			
 
				+    }
			
 
				+    
			
 
				+    public void jobInitialized(JobSchedulingInfo jobSchedInfo, 
			
 
				+        JobInProgress job) {
			
 
				+      runningJobs.put(jobSchedInfo, job);
			
 
				+    }
			
 
				+    
			
 
				+    public void jobCompleted(JobSchedulingInfo jobSchedInfo, 
			
 
				+        JobInProgress job) {
			
 
				+      // It is *ok* to remove from runningJobs even if the job was never RUNNING
			
 
				+      runningJobs.remove(jobSchedInfo);
			
 
				+      activeTasks -= job.desiredTasks();
			
 
				+    }
			
 
				+    
			
 
				+    boolean isInactive() {
			
 
				+      return activeTasks == 0 && runningJobs.size() == 0  && 
			
 
				+      waitingJobs.size() == 0 && initializingJobs.size() == 0;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  synchronized Collection<JobInProgress> getWaitingJobs() {
			
 
				+    return Collections.unmodifiableCollection(
			
 
				+        new LinkedList<JobInProgress>(waitingJobs.values()));
			
 
				+  }
			
 
				+  
			
 
				+  synchronized Collection<JobInProgress> getInitializingJobs() {
			
 
				+    return Collections.unmodifiableCollection(
			
 
				+        new LinkedList<JobInProgress>(initializingJobs.values()));
			
 
				+  }
			
 
				+  
			
 
				+  synchronized Collection<JobInProgress> getRunningJobs() {
			
 
				+    return Collections.unmodifiableCollection(
			
 
				+        new LinkedList<JobInProgress>(runningJobs.values())); 
			
 
				+  }
			
 
				+  
			
 
				+  synchronized int getNumActiveTasks() {
			
 
				+    return activeTasks;
			
 
				+  }
			
 
				+  
			
 
				+  synchronized int getNumRunningJobs() {
			
 
				+    return runningJobs.size();
			
 
				+  }
			
 
				+  
			
 
				+  synchronized int getNumInitializingJobs() {
			
 
				+    return initializingJobs.size();
			
 
				+  }
			
 
				+  
			
 
				+  synchronized int getNumInitializingJobsByUser(String user) {
			
 
				+    UserInfo userInfo = users.get(user);
			
 
				+    return (userInfo == null) ? 0 : userInfo.getNumInitializingJobs();
			
 
				+  }
			
 
				+  
			
 
				+  synchronized int getNumRunningJobsByUser(String user) {
			
 
				+    UserInfo userInfo = users.get(user);
			
 
				+    return (userInfo == null) ? 0 : userInfo.getNumRunningJobs();
			
 
				+  }
			
 
				+
			
 
				+  synchronized int getNumActiveTasksByUser(String user) {
			
 
				+    UserInfo userInfo = users.get(user);
			
 
				+    return (userInfo == null) ? 0 : userInfo.getNumActiveTasks();
			
 
				+  }
			
 
				+
			
 
				+  synchronized int getNumWaitingJobsByUser(String user) {
			
 
				+    UserInfo userInfo = users.get(user);
			
 
				+    return (userInfo == null) ? 0 : userInfo.getNumWaitingJobs();
			
 
				+  }
			
 
				+
			
 
				+  synchronized void addInitializingJob(JobInProgress job) {
			
 
				+    JobSchedulingInfo jobSchedInfo = new JobSchedulingInfo(job);
			
 
				+
			
 
				+    if (!waitingJobs.containsKey(jobSchedInfo)) {
			
 
				+      // Ideally this should have been an *assert*, but it can't be done
			
 
				+      // since we make copies in getWaitingJobs which is used in 
			
 
				+      // JobInitPoller.getJobsToInitialize
			
 
				+      LOG.warn("Cannot find job " + job.getJobID() + 
			
 
				+          " in list of waiting jobs!");
			
 
				+      return;
			
 
				+    }
			
 
				+    
			
 
				+    if (initializingJobs.containsKey(jobSchedInfo)) {
			
 
				+      LOG.warn("job " + job.getJobID() + " already being init'ed in queue'" +
			
 
				+          queueName + "'!");
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    // Mark the job as running
			
 
				+    initializingJobs.put(jobSchedInfo, job);
			
 
				+
			
 
				+    addJob(jobSchedInfo, job);
			
 
				+    
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      String user = job.getProfile().getUser();
			
 
				+      LOG.debug("addInitializingJob:" +
			
 
				+          " job=" + job.getJobID() +
			
 
				+          " user=" + user + 
			
 
				+          " queue=" + queueName +
			
 
				+          " qWaitJobs=" +  getNumWaitingJobs() +
			
 
				+          " qInitJobs=" +  getNumInitializingJobs()+
			
 
				+          " qRunJobs=" +  getNumRunningJobs() +
			
 
				+          " qActiveTasks=" +  getNumActiveTasks() +
			
 
				+          " uWaitJobs=" +  getNumWaitingJobsByUser(user) +
			
 
				+          " uInitJobs=" +  getNumInitializingJobsByUser(user) +
			
 
				+          " uRunJobs=" +  getNumRunningJobsByUser(user) +
			
 
				+          " uActiveTasks=" +  getNumActiveTasksByUser(user)
			
 
				+      );
			
 
				+    }
			
 
				+
			
 
				+    // Remove the job from 'waiting' jobs list
			
 
				+    removeWaitingJob(jobSchedInfo, JobStatus.PREP);
			
 
				+  }
			
 
				+  
			
 
				+  synchronized JobInProgress removeInitializingJob(
			
 
				+      JobSchedulingInfo jobSchedInfo, int runState) {
			
 
				+    JobInProgress job = initializingJobs.remove(jobSchedInfo);
			
 
				+    
			
 
				+    if (job != null) {
			
 
				+      String user = job.getProfile().getUser();
			
 
				+      UserInfo userInfo = users.get(user);
			
 
				+      userInfo.removeInitializingJob(jobSchedInfo);
			
 
				+      
			
 
				+      // Decrement counts if the job is killed _while_ it was selected for
			
 
				+      // initialization, but aborted
			
 
				+      // NOTE: addRunningJob calls removeInitializingJob with runState==RUNNING
			
 
				+      if (runState != JobStatus.RUNNING) {
			
 
				+        finishJob(jobSchedInfo, job);
			
 
				+      }
			
 
				+      
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("removeInitializingJob:" +
			
 
				+            " job=" + job.getJobID() +
			
 
				+            " user=" + user + 
			
 
				+            " queue=" + queueName +
			
 
				+            " qWaitJobs=" +  getNumWaitingJobs() +
			
 
				+            " qInitJobs=" +  getNumInitializingJobs()+
			
 
				+            " qRunJobs=" +  getNumRunningJobs() +
			
 
				+            " qActiveTasks=" +  getNumActiveTasks() +
			
 
				+            " uWaitJobs=" +  getNumWaitingJobsByUser(user) +
			
 
				+            " uInitJobs=" +  getNumInitializingJobsByUser(user) +
			
 
				+            " uRunJobs=" +  getNumRunningJobsByUser(user) +
			
 
				+            " uActiveTasks=" +  getNumActiveTasksByUser(user)
			
 
				+        );
			
 
				+      }
			
 
				+    }
			
 
				+    
			
 
				+    return job;
			
 
				+  }
			
 
				+  
			
 
				+  synchronized void addRunningJob(JobInProgress job) {
			
 
				+    JobSchedulingInfo jobSchedInfo = new JobSchedulingInfo(job);
			
 
				+
			
 
				+    if (runningJobs.containsKey(jobSchedInfo)) {
			
 
				+      LOG.info("job " + job.getJobID() + " already running in queue'" +
			
 
				+          queueName + "'!");
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    // Mark the job as running
			
 
				+    runningJobs.put(jobSchedInfo,job);
			
 
				+
			
 
				+    // Update user stats
			
 
				+    String user = job.getProfile().getUser();
			
 
				+    UserInfo userInfo = users.get(user);
			
 
				+    userInfo.jobInitialized(jobSchedInfo, job);
			
 
				+    
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("addRunningJob:" +
			
 
				+          " job=" + job.getJobID() +
			
 
				+          " user=" + user + 
			
 
				+          " queue=" + queueName +
			
 
				+          " qWaitJobs=" +  getNumWaitingJobs() +
			
 
				+          " qInitJobs=" +  getNumInitializingJobs()+
			
 
				+          " qRunJobs=" +  getNumRunningJobs() +
			
 
				+          " qActiveTasks=" +  getNumActiveTasks() +
			
 
				+          " uWaitJobs=" +  getNumWaitingJobsByUser(user) +
			
 
				+          " uInitJobs=" +  getNumInitializingJobsByUser(user) +
			
 
				+          " uRunJobs=" +  getNumRunningJobsByUser(user) +
			
 
				+          " uActiveTasks=" +  getNumActiveTasksByUser(user)
			
 
				+      );
			
 
				+    }
			
 
				+
			
 
				+    // Remove from 'initializing' list
			
 
				+    // Note that at this point job.status.state != RUNNING, 
			
 
				+    // however, logically it is a reasonable state to pass in to ensure
			
 
				+    // that removeInitializingJob doesn't double-decrement  
			
 
				+    // the relevant queue/user counters
			
 
				+    removeInitializingJob(jobSchedInfo, JobStatus.RUNNING);
			
 
				+  }
			
 
				+
			
 
				+  synchronized private void addJob(JobSchedulingInfo jobSchedInfo,
			
 
				+      JobInProgress job) {
			
 
				+    // Update queue stats
			
 
				+    activeTasks += job.desiredTasks();
			
 
				+    
			
 
				+    // Update user stats
			
 
				+    String user = job.getProfile().getUser();
			
 
				+    UserInfo userInfo = users.get(user);
			
 
				+    userInfo.jobInitializing(jobSchedInfo, job);
			
 
				+  }
			
 
				+  
			
 
				+  synchronized private void finishJob(JobSchedulingInfo jobSchedInfo,
			
 
				+      JobInProgress job) {
			
 
				+    // Update user stats
			
 
				+    String user = job.getProfile().getUser();
			
 
				+    UserInfo userInfo = users.get(user);
			
 
				+    userInfo.jobCompleted(jobSchedInfo, job);
			
 
				+    
			
 
				+    if (userInfo.isInactive()) {
			
 
				+      users.remove(userInfo);
			
 
				+    }
			
 
				+
			
 
				+    // Update queue stats
			
 
				+    activeTasks -= job.desiredTasks();
			
 
				+  }
			
 
				+  
			
 
				+  synchronized JobInProgress removeRunningJob(JobSchedulingInfo jobSchedInfo, 
			
 
				+      int runState) {
			
 
				+    JobInProgress job = runningJobs.remove(jobSchedInfo); 
			
 
				+
			
 
				+    // We have to be careful, we might be trying to remove a job  
			
 
				+    // which might not have been initialized
			
 
				+    if (job != null) {
			
 
				+      String user = job.getProfile().getUser();
			
 
				+      finishJob(jobSchedInfo, job);
			
 
				+      
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("removeRunningJob:" +
			
 
				+            " job=" + job.getJobID() +
			
 
				+            " user=" + user + 
			
 
				+            " queue=" + queueName +
			
 
				+            " qWaitJobs=" +  getNumWaitingJobs() +
			
 
				+            " qInitJobs=" +  getNumInitializingJobs()+
			
 
				+            " qRunJobs=" +  getNumRunningJobs() +
			
 
				+            " qActiveTasks=" +  getNumActiveTasks() +
			
 
				+            " uWaitJobs=" +  getNumWaitingJobsByUser(user) +
			
 
				+            " uInitJobs=" +  getNumInitializingJobsByUser(user) +
			
 
				+            " uRunJobs=" +  getNumRunningJobsByUser(user) +
			
 
				+            " uActiveTasks=" +  getNumActiveTasksByUser(user)
			
 
				+        );
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    return job;
			
 
				+  }
			
 
				+  
			
 
				+  synchronized void addWaitingJob(JobInProgress job) throws IOException {
			
 
				+    JobSchedulingInfo jobSchedInfo = new JobSchedulingInfo(job);
			
 
				+    if (waitingJobs.containsKey(jobSchedInfo)) {
			
 
				+      LOG.info("job " + job.getJobID() + " already waiting in queue '" + 
			
 
				+          queueName + "'!");
			
 
				+      return;
			
 
				+    }
			
 
				+    
			
 
				+    String user = job.getProfile().getUser();
			
 
				+
			
 
				+    // Check acceptance limits
			
 
				+    checkJobSubmissionLimits(job, user);
			
 
				+    
			
 
				+    waitingJobs.put(jobSchedInfo, job);
			
 
				+    
			
 
				+    // Update user stats
			
 
				+    UserInfo userInfo = users.get(user);
			
 
				+    if (userInfo == null) {
			
 
				+      userInfo = new UserInfo(comparator);
			
 
				+      users.put(user, userInfo);
			
 
				+    }
			
 
				+    userInfo.jobAdded(jobSchedInfo, job);
			
 
				+    
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("addWaitingJob:" +
			
 
				+          " job=" + job.getJobID() +
			
 
				+          " user=" + user + 
			
 
				+          " queue=" + queueName +
			
 
				+          " qWaitJobs=" +  getNumWaitingJobs() +
			
 
				+          " qInitJobs=" +  getNumInitializingJobs()+
			
 
				+          " qRunJobs=" +  getNumRunningJobs() +
			
 
				+          " qActiveTasks=" +  getNumActiveTasks() +
			
 
				+          " uWaitJobs=" +  getNumWaitingJobsByUser(user) +
			
 
				+          " uInitJobs=" +  getNumInitializingJobsByUser(user) +
			
 
				+          " uRunJobs=" +  getNumRunningJobsByUser(user) +
			
 
				+          " uActiveTasks=" +  getNumActiveTasksByUser(user)
			
 
				+      );
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  synchronized JobInProgress removeWaitingJob(JobSchedulingInfo jobSchedInfo, 
			
 
				+      int unused) {
			
 
				+    JobInProgress job = waitingJobs.remove(jobSchedInfo);
			
 
				+    if (job != null) {
			
 
				+      String user = job.getProfile().getUser();
			
 
				+      UserInfo userInfo = users.get(user);
			
 
				+      userInfo.removeWaitingJob(jobSchedInfo);
			
 
				+
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("removeWaitingJob:" +
			
 
				+            " job=" + job.getJobID() +
			
 
				+            " user=" + user + 
			
 
				+            " queue=" + queueName +
			
 
				+            " qWaitJobs=" +  getNumWaitingJobs() +
			
 
				+            " qInitJobs=" +  getNumInitializingJobs()+
			
 
				+            " qRunJobs=" +  getNumRunningJobs() +
			
 
				+            " qActiveTasks=" +  getNumActiveTasks() +
			
 
				+            " uWaitJobs=" +  getNumWaitingJobsByUser(user) +
			
 
				+            " uInitJobs=" +  getNumInitializingJobsByUser(user) +
			
 
				+            " uRunJobs=" +  getNumRunningJobsByUser(user) +
			
 
				+            " uActiveTasks=" +  getNumActiveTasksByUser(user)
			
 
				+        );
			
 
				+      }
			
 
				+    }
			
 
				+    
			
 
				+    return job;
			
 
				+  }
			
 
				+
			
 
				+  synchronized int getNumActiveUsers() {
			
 
				+    return users.size();
			
 
				+  }
			
 
				+  
			
 
				+  synchronized int getNumWaitingJobs() {
			
 
				+    return waitingJobs.size(); 
			
 
				+  } 
			
 
				+  
			
 
				+  Comparator<JobSchedulingInfo> getComparator() {
			
 
				+    return comparator;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Functions to deal with queue-limits.
			
 
				+   */
			
 
				+  
			
 
				+  /**
			
 
				+   * Check if the queue can be assigned <code>numSlots</code> 
			
 
				+   * of the given <code>taskType</code> so that the queue doesn't exceed its
			
 
				+   * configured maximum-capacity.
			
 
				+   * 
			
 
				+   * @param taskType
			
 
				+   * @param numSlots
			
 
				+   * @return <code>true</code> if slots can be assigned
			
 
				+   */
			
 
				+  boolean assignSlotsToQueue(TaskType taskType, int numSlots) {
			
 
				+    // Check if the queue is running over it's maximum-capacity
			
 
				+    if (getMaxCapacity(taskType) > 0) {  // Check if max capacity is enabled
			
 
				+        if ((getNumSlotsOccupied(taskType) + numSlots) > 
			
 
				+             getMaxCapacity(taskType)) {
			
 
				+          if (LOG.isDebugEnabled()) {
			
 
				+            LOG.debug(
			
 
				+                "Queue " + queueName + " " + "has reached its  max " + 
			
 
				+                taskType + " capacity");
			
 
				+            LOG.debug("Current running tasks " + getCapacity(taskType));
			
 
				+          }
			
 
				+          return false;
			
 
				+        }
			
 
				+      }
			
 
				+    
			
 
				+    return true;
			
 
				+  }
			
 
				+  /**
			
 
				+   * Check if the given <code>job</code> and <code>user</code> and 
			
 
				+   * queue can be assigned the requested number of slots of 
			
 
				+   * the given <code>taskType</code> for the .
			
 
				+   * 
			
 
				+   * This checks to ensure that queue and user are under appropriate limits.
			
 
				+   * 
			
 
				+   * @param taskType
			
 
				+   * @param job
			
 
				+   * @param user
			
 
				+   * @return <code>true</code> if the given job/user/queue can be assigned 
			
 
				+   * the requested number of slots, <code>false</code> otherwise
			
 
				+   */
			
 
				+  boolean assignSlotsToJob(TaskType taskType, JobInProgress job, String user) {
			
 
				+    int numSlotsRequested = job.getNumSlotsPerTask(taskType);
			
 
				+    
			
 
				+    // Check to ensure we will not go over the queue's max-capacity
			
 
				+    if (!assignSlotsToQueue(taskType, numSlotsRequested)) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    
			
 
				+    // What is our current capacity? 
			
 
				+    // * It is equal to the max(numSlotsRequested queue-capacity) if
			
 
				+    //   we're running below capacity. The 'max' ensures that jobs in queues
			
 
				+    //   with miniscule capacity (< 1 slot) make progress
			
 
				+    // * If we're running over capacity, then its
			
 
				+    //   #running plus slotPerTask of the job (which is the number of extra
			
 
				+    //   slots we're getting).
			
 
				+    
			
 
				+    // Allow progress for queues with miniscule capacity
			
 
				+    int queueCapacity = Math.max(getCapacity(taskType), numSlotsRequested);
			
 
				+    
			
 
				+    int queueSlotsOccupied = getNumSlotsOccupied(taskType);
			
 
				+    int currentCapacity;
			
 
				+    if (queueSlotsOccupied < queueCapacity) {
			
 
				+      currentCapacity = queueCapacity;
			
 
				+    }
			
 
				+    else {
			
 
				+      currentCapacity = queueSlotsOccupied + numSlotsRequested;
			
 
				+    }
			
 
				+    
			
 
				+    // Never allow a single user to take more than the 
			
 
				+    // queue's configured capacity * user-limit-factor.
			
 
				+    // Also, the queue's configured capacity should be higher than 
			
 
				+    // queue-hard-limit * ulMin
			
 
				+    
			
 
				+    // All users in this queue might not need any slots of type 'taskType'
			
 
				+    int activeUsers = Math.max(1, getNumActiveUsersByTaskType(taskType));  
			
 
				+    
			
 
				+    int limit = 
			
 
				+      Math.min(
			
 
				+          Math.max(divideAndCeil(currentCapacity, activeUsers), 
			
 
				+                   divideAndCeil(ulMin*currentCapacity, 100)),
			
 
				+          (int)(queueCapacity * ulMinFactor)
			
 
				+          );
			
 
				+
			
 
				+    if ((getNumSlotsOccupiedByUser(user, taskType) + numSlotsRequested) > 
			
 
				+        limit) {
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("User " + user + " is over limit for queue=" + queueName + 
			
 
				+            " queueCapacity=" + queueCapacity +
			
 
				+            " num slots occupied=" + getNumSlotsOccupiedByUser(user, taskType) + 
			
 
				+            " limit=" + limit +" numSlotsRequested=" + numSlotsRequested + 
			
 
				+            " currentCapacity=" + currentCapacity + 
			
 
				+            " numActiveUsers=" + getNumActiveUsersByTaskType(taskType));
			
 
				+      }
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    return true;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Ceil of result of dividing two integers.
			
 
				+   * 
			
 
				+   * This is *not* a utility method. 
			
 
				+   * Neither <code>a</code> or <code>b</code> should be negative.
			
 
				+   *  
			
 
				+   * @param a
			
 
				+   * @param b
			
 
				+   * @return ceil of the result of a/b
			
 
				+   */
			
 
				+  private static int divideAndCeil(int a, int b) {
			
 
				+    if (b == 0) {
			
 
				+      LOG.info("divideAndCeil called with a=" + a + " b=" + b);
			
 
				+      return 0;
			
 
				+    }
			
 
				+    return (a + (b - 1)) / b;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Check if the given <code>job</code> can be accepted to the 
			
 
				+   * queue on behalf of the <code>user</code>.
			
 
				+   * @param job 
			
 
				+   * @param user
			
 
				+   * @return <code>true</code> if the job can be accepted, 
			
 
				+   *         <code>false</code> otherwise
			
 
				+   */
			
 
				+  synchronized void checkJobSubmissionLimits(JobInProgress job, String user) 
			
 
				+  throws IOException {
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("checkJobSubmissionLimits - " +
			
 
				+          "qWaitJobs=" + getNumWaitingJobs() + " " +
			
 
				+          "qInitJobs=" + getNumInitializingJobs() + " " +
			
 
				+          "qRunJobs=" + getNumRunningJobs() + " " +
			
 
				+          "maxJobsToAccept=" + maxJobsToAccept +
			
 
				+          "user=" + user + " " +
			
 
				+          "uWaitJobs=" +  getNumWaitingJobsByUser(user) + " " +
			
 
				+          "uRunJobs=" + getNumRunningJobsByUser(user)  + " " +
			
 
				+          "maxJobsPerUserToAccept=" + maxJobsPerUserToAccept + " " +
			
 
				+          "");
			
 
				+    }
			
 
				+    
			
 
				+    // Task limits - No point accepting the job if it can never be initialized
			
 
				+    if (job.desiredTasks() > maxActiveTasksPerUser) {
			
 
				+      throw new IOException(
			
 
				+          "Job '" + job.getJobID() + "' from user '" + user  +
			
 
				+          "' rejected since it has " + job.desiredTasks() + " tasks which" +
			
 
				+          " exceeds the limit of " + maxActiveTasksPerUser + 
			
 
				+          " tasks per-user which can be initialized for queue '" + 
			
 
				+          queueName + "'"
			
 
				+          );
			
 
				+    }
			
 
				+    
			
 
				+    // Across all jobs in queue
			
 
				+    int queueWaitingJobs = getNumWaitingJobs();
			
 
				+    int queueInitializingJobs = getNumInitializingJobs();
			
 
				+    int queueRunningJobs = getNumRunningJobs();
			
 
				+    if ((queueWaitingJobs + queueInitializingJobs + queueRunningJobs) >= 
			
 
				+      maxJobsToAccept) {
			
 
				+      throw new IOException(
			
 
				+          "Job '" + job.getJobID() + "' from user '" + user  + 
			
 
				+          "' rejected since queue '" + queueName + 
			
 
				+          "' already has " + queueWaitingJobs + " waiting jobs, " + 
			
 
				+          queueInitializingJobs + " initializing jobs and " + 
			
 
				+          queueRunningJobs + " running jobs - Exceeds limit of " +
			
 
				+          maxJobsToAccept + " jobs to accept");
			
 
				+    }
			
 
				+    
			
 
				+    // Across all jobs of the user
			
 
				+    int userWaitingJobs = getNumWaitingJobsByUser(user);
			
 
				+    int userInitializingJobs = getNumInitializingJobsByUser(user);
			
 
				+    int userRunningJobs = getNumRunningJobsByUser(user);
			
 
				+    if ((userWaitingJobs + userInitializingJobs + userRunningJobs) >= 
			
 
				+        maxJobsPerUserToAccept) {
			
 
				+      throw new IOException(
			
 
				+          "Job '" + job.getJobID() + "' rejected since user '" + user +  
			
 
				+          "' already has " + userWaitingJobs + " waiting jobs, " +
			
 
				+          userInitializingJobs + " initializing jobs and " +
			
 
				+          userRunningJobs + " running jobs - " +
			
 
				+          " Exceeds limit of " + maxJobsPerUserToAccept + " jobs to accept" +
			
 
				+          " in queue '" + queueName + "' per user");
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Check if the <code>job</code> can be initialized in the queue.
			
 
				+   * 
			
 
				+   * @param job
			
 
				+   * @return <code>true</code> if the job can be initialized, 
			
 
				+   *         <code>false</code> otherwise
			
 
				+   */
			
 
				+  synchronized boolean initializeJobForQueue(JobInProgress job) {
			
 
				+    
			
 
				+    // Check if queue has sufficient number of jobs
			
 
				+    int runningJobs = getNumRunningJobs();
			
 
				+    int initializingJobs = getNumInitializingJobs();
			
 
				+    if ((runningJobs + initializingJobs) >= maxJobsToInit) {
			
 
				+      LOG.info(getQueueName() + " already has " + runningJobs + 
			
 
				+          " running jobs and " + initializingJobs + " initializing jobs;" +
			
 
				+          " cannot initialize " + job.getJobID() + 
			
 
				+          " since it will exceeed limit of " + maxJobsToInit + 
			
 
				+          " initialized jobs for this queue");
			
 
				+      return false;
			
 
				+    }
			
 
				+    
			
 
				+    // Check if queue has too many active tasks
			
 
				+    if ((activeTasks + job.desiredTasks()) > maxActiveTasks) {
			
 
				+      LOG.info("Queue '" + getQueueName() + "' has " + activeTasks + 
			
 
				+          " active tasks, cannot initialize job '" + job.getJobID() + 
			
 
				+          "' for user '" + job.getProfile().getUser() + "' with " +
			
 
				+          job.desiredTasks() + " tasks since it will exceed limit of " + 
			
 
				+          maxActiveTasks + " active tasks for this queue");
			
 
				+      return false;
			
 
				+    }
			
 
				+    
			
 
				+    return true;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Check if the <code>job</code> can be initialized in the queue
			
 
				+   * on behalf of the <code>user</code>.
			
 
				+   * 
			
 
				+   * @param job
			
 
				+   * @return <code>true</code> if the job can be initialized, 
			
 
				+   *         <code>false</code> otherwise
			
 
				+   */
			
 
				+  synchronized boolean initializeJobForUser(JobInProgress job) {
			
 
				+    
			
 
				+    String user = job.getProfile().getUser();
			
 
				+    
			
 
				+    // Check if the user has too many jobs
			
 
				+    int userRunningJobs = getNumRunningJobsByUser(user);
			
 
				+    int userInitializingJobs = getNumInitializingJobsByUser(user);
			
 
				+    if ((userRunningJobs + userInitializingJobs) >= maxJobsPerUserToInit) {
			
 
				+      LOG.info(getQueueName() + " already has " + userRunningJobs + 
			
 
				+          " running jobs and " + userInitializingJobs + " initializing jobs" +
			
 
				+          " for user " + user + "; cannot initialize " + job.getJobID() + 
			
 
				+          " since it will exceeed limit of " + 
			
 
				+          maxJobsPerUserToInit + " initialized jobs per user for this queue");
			
 
				+      return false;
			
 
				+    }
			
 
				+    
			
 
				+    // Check if the user has too many active tasks
			
 
				+    int userActiveTasks = getNumActiveTasksByUser(user);
			
 
				+    if ((userActiveTasks + job.desiredTasks()) > maxActiveTasksPerUser) {
			
 
				+      LOG.info(getQueueName() + " has " + userActiveTasks + 
			
 
				+          " active tasks for user " + user + 
			
 
				+          ", cannot initialize " + job.getJobID() + " with " +
			
 
				+          job.desiredTasks() + " tasks since it will exceed limit of " + 
			
 
				+          maxActiveTasksPerUser + " active tasks per user for this queue");
			
 
				+      return false;
			
 
				+    }
			
 
				+    
			
 
				+    return true;
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/CapacitySchedulerServlet.java
+++ b/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/CapacitySchedulerServlet.java
@@ -0,0 +1,154 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import java.io.ByteArrayOutputStream;
			
 
				+import java.io.IOException;
			
 
				+import java.io.OutputStream;
			
 
				+import java.io.PrintWriter;
			
 
				+import java.util.Collections;
			
 
				+import java.util.HashMap;
			
 
				+import javax.servlet.ServletContext;
			
 
				+import javax.servlet.ServletException;
			
 
				+import javax.servlet.http.HttpServlet;
			
 
				+import javax.servlet.http.HttpServletRequest;
			
 
				+import javax.servlet.http.HttpServletResponse;
			
 
				+
			
 
				+import org.apache.hadoop.mapreduce.TaskType;
			
 
				+import org.apache.hadoop.mapred.JobHistory.JobInfo;
			
 
				+import org.apache.hadoop.util.StringUtils;
			
 
				+
			
 
				+/**
			
 
				+ * Servlet for displaying fair scheduler information, installed at [job tracker
			
 
				+ * URL]/scheduler when the {@link FairScheduler} is in use.
			
 
				+ * 
			
 
				+ * The main features are viewing each job's task count and fair share, ability
			
 
				+ * to change job priorities and pools from the UI, and ability to switch the
			
 
				+ * scheduler to FIFO mode without restarting the JobTracker if this is required
			
 
				+ * for any reason.
			
 
				+ * 
			
 
				+ * There is also an "advanced" view for debugging that can be turned on by going
			
 
				+ * to [job tracker URL]/scheduler?advanced.
			
 
				+ */
			
 
				+public class CapacitySchedulerServlet extends HttpServlet {
			
 
				+  private static final long serialVersionUID = 9104070533067306659L;
			
 
				+
			
 
				+  private transient CapacityTaskScheduler scheduler;
			
 
				+  private transient  JobTracker jobTracker;
			
 
				+
			
 
				+  @Override
			
 
				+  public void init() throws ServletException {
			
 
				+    super.init();
			
 
				+    ServletContext servletContext = this.getServletContext();
			
 
				+    this.scheduler = (CapacityTaskScheduler) servletContext
			
 
				+        .getAttribute("scheduler");
			
 
				+    this.jobTracker = (JobTracker) scheduler.taskTrackerManager;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  protected void doPost(HttpServletRequest req, HttpServletResponse resp)
			
 
				+      throws ServletException, IOException {
			
 
				+    doGet(req, resp); // Same handler for both GET and POST
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void doGet(HttpServletRequest request, HttpServletResponse response)
			
 
				+      throws ServletException, IOException {
			
 
				+    // Print out the normal response
			
 
				+    response.setContentType("text/html");
			
 
				+
			
 
				+    // Because the client may read arbitrarily slow, and we hold locks while
			
 
				+    // the servlet output, we want to write to our own buffer which we know
			
 
				+    // won't block.
			
 
				+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
			
 
				+    PrintWriter out = new PrintWriter(baos);
			
 
				+    String hostname = StringUtils.simpleHostname(jobTracker
			
 
				+        .getJobTrackerMachine());
			
 
				+    out.print("<html><head>");
			
 
				+    out.printf("<title>%s Job Scheduler Admininstration</title>\n", hostname);
			
 
				+    out.print("<link rel=\"stylesheet\" type=\"text/css\" "
			
 
				+        + "href=\"/static/hadoop.css\">\n");
			
 
				+    out.print("<script type=\"text/javascript\" "
			
 
				+        + "src=\"/static/sorttable.js\"></script> \n");
			
 
				+    out.print("</head><body>\n");
			
 
				+    out.printf("<h1><a href=\"/jobtracker.jsp\">%s</a> "
			
 
				+        + "Job Scheduler Administration</h1>\n", hostname);
			
 
				+    showQueues(out);
			
 
				+    out.print("</body></html>\n");
			
 
				+    out.close();
			
 
				+
			
 
				+    // Flush our buffer to the real servlet output
			
 
				+    OutputStream servletOut = response.getOutputStream();
			
 
				+    baos.writeTo(servletOut);
			
 
				+    servletOut.close();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Print a view of pools to the given output writer.
			
 
				+   */
			
 
				+
			
 
				+  private void showQueues(PrintWriter out) 
			
 
				+      throws IOException {
			
 
				+    synchronized(scheduler) {
			
 
				+      out.print("<h2>Queues</h2>\n");
			
 
				+      out.print("<table border=\"2\" cellpadding=\"5\" " + 
			
 
				+                " cellspacing=\"2\" class=\"sortable\"> \n");
			
 
				+      out.print("<tr><th>Queue</th>" +
			
 
				+      		      "<th>Running Jobs</th>" + 
			
 
				+                "<th>Pending Jobs</th>" + 
			
 
				+      		      "<th>Capacity Percentage</th>" +
			
 
				+      		      "<th>Map Task Capacity</th>" +
			
 
				+      		      "<th>Map Task Used Capacity</th>" +
			
 
				+      		      "<th>Running Maps</th>" +
			
 
				+      		      "<th>Reduce Task Capacity</th>" + 
			
 
				+                "<th>Reduce Task Used Capacity</th>" +
			
 
				+                "<th>Running Reduces </tr>\n");
			
 
				+      for (CapacitySchedulerQueue queue : scheduler.getQueueInfoMap().values()) {
			
 
				+        String queueName = queue.getQueueName();
			
 
				+        out.print("<tr>\n");
			
 
				+        out.printf(
			
 
				+            "<td><a href=\"jobqueue_details.jsp?queueName=%s\">%s</a></td>\n",
			
 
				+            queueName, queueName);
			
 
				+        out.printf("<td>%s</td>\n", 
			
 
				+            (queue.getNumRunningJobs() + queue.getNumInitializingJobs()));
			
 
				+        out.printf("<td>%s</td>\n", queue.getNumWaitingJobs());
			
 
				+        out.printf("<td>%.1f%%</td>\n", queue.getCapacityPercent());
			
 
				+        int mapCapacity = queue.getCapacity(TaskType.MAP);
			
 
				+        int mapSlotsOccupied = queue.getNumSlotsOccupied(TaskType.MAP);
			
 
				+        int reduceSlotsOccupied = queue.getNumSlotsOccupied(TaskType.REDUCE);
			
 
				+        float occupiedSlotsAsPercent = 
			
 
				+            mapCapacity != 0 ? ((float) mapSlotsOccupied * 100 / mapCapacity)
			
 
				+            : 0;
			
 
				+        out.printf("<td>%s</td>\n", mapCapacity);
			
 
				+        out.printf("<td>%s (%.1f%% of Capacity)</td>\n", mapSlotsOccupied,
			
 
				+            occupiedSlotsAsPercent);
			
 
				+        out.printf("<td>%s</td>\n", queue.getNumRunningTasks(TaskType.MAP));
			
 
				+        int reduceCapacity = queue.getCapacity(TaskType.REDUCE);
			
 
				+        float redOccupiedSlotsAsPercent = 
			
 
				+          (reduceCapacity != 0 ? ((float)reduceSlotsOccupied*100 / mapCapacity)
			
 
				+            : 0);
			
 
				+        out.printf("<td>%s</td>\n", reduceCapacity);
			
 
				+        out.printf("<td>%s (%.1f%% of Capacity)</td>\n", reduceSlotsOccupied,
			
 
				+            redOccupiedSlotsAsPercent);
			
 
				+        out.printf("<td>%s</td>\n", queue.getNumRunningTasks(TaskType.REDUCE));
			
 
				+      }
			
 
				+      out.print("</table>\n");
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/CapacityTaskScheduler.java
+++ b/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/CapacityTaskScheduler.java
--- a/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/JobInitializationPoller.java
+++ b/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/JobInitializationPoller.java
@@ -19,11 +19,15 @@ package org.apache.hadoop.mapred;
 
				 
			
 
				 import java.util.ArrayList;
			
 
				 import java.util.Collection;
			
 
				+import java.util.Collections;
			
 
				 import java.util.HashMap;
			
 
				+import java.util.HashSet;
			
 
				 import java.util.Iterator;
			
 
				+import java.util.Map;
			
 
				 import java.util.Set;
			
 
				 import java.util.TreeMap;
			
 
				 import java.util.Map.Entry;
			
 
				+import java.util.concurrent.ConcurrentHashMap;
			
 
				 import java.util.concurrent.atomic.AtomicInteger;
			
 
				 
			
 
				 import org.apache.commons.logging.Log;
			
@@ -68,16 +72,6 @@ public class JobInitializationPoller extends Thread {
 
				   private static final Log LOG = LogFactory
			
 
				       .getLog(JobInitializationPoller.class.getName());
			
 
				 
			
 
				-  /*
			
 
				-   * The poller picks up jobs across users to initialize based on user limits.
			
 
				-   * Suppose the user limit for a queue is 25%, it means atmost 4 users' jobs
			
 
				-   * can run together. However, in order to account for jobs from a user that
			
 
				-   * might complete faster than others, it initializes jobs from an additional
			
 
				-   * number of users as a backlog. This variable defines the additional
			
 
				-   * number of users whose jobs can be considered for initializing. 
			
 
				-   */
			
 
				-  private static final int MAX_ADDITIONAL_USERS_TO_INIT = 2;
			
 
				-
			
 
				   private JobQueuesManager jobQueueManager;
			
 
				   private long sleepInterval;
			
 
				   private int poolSize;
			
@@ -100,11 +94,12 @@ public class JobInitializationPoller extends Thread {
 
				      * The hash map which maintains relationship between queue to jobs to
			
 
				      * initialize per queue.
			
 
				      */
			
 
				-    private HashMap<String, TreeMap<JobSchedulingInfo, JobInProgress>> jobsPerQueue;
			
 
				+    private Map<String, Map<JobSchedulingInfo, JobInProgress>> jobsPerQueue;
			
 
				 
			
 
				     public JobInitializationThread() {
			
 
				       startIniting = true;
			
 
				-      jobsPerQueue = new HashMap<String, TreeMap<JobSchedulingInfo, JobInProgress>>();
			
 
				+      jobsPerQueue = 
			
 
				+        new ConcurrentHashMap<String, Map<JobSchedulingInfo, JobInProgress>>();
			
 
				     }
			
 
				 
			
 
				     @Override
			
@@ -156,8 +151,7 @@ public class JobInitializationPoller extends Thread {
 
				      * @return First job in the queue and removes it.
			
 
				      */
			
 
				     private JobInProgress getFirstJobInQueue(String queue) {
			
 
				-      TreeMap<JobSchedulingInfo, JobInProgress> jobsList = jobsPerQueue
			
 
				-          .get(queue);
			
 
				+      Map<JobSchedulingInfo, JobInProgress> jobsList = jobsPerQueue.get(queue);
			
 
				       synchronized (jobsList) {
			
 
				         if (jobsList.isEmpty()) {
			
 
				           return null;
			
@@ -186,8 +180,7 @@ public class JobInitializationPoller extends Thread {
 
				     }
			
 
				 
			
 
				     void addJobsToQueue(String queue, JobInProgress job) {
			
 
				-      TreeMap<JobSchedulingInfo, JobInProgress> jobs = jobsPerQueue
			
 
				-          .get(queue);
			
 
				+      Map<JobSchedulingInfo, JobInProgress> jobs = jobsPerQueue.get(queue);
			
 
				       if (jobs == null) {
			
 
				         LOG.error("Invalid queue passed to the thread : " + queue
			
 
				             + " For job :: " + job.getJobID());
			
@@ -199,43 +192,20 @@ public class JobInitializationPoller extends Thread {
 
				       }
			
 
				     }
			
 
				 
			
 
				-    void addQueue(String queue) {
			
 
				-      TreeMap<JobSchedulingInfo, JobInProgress> jobs = new TreeMap<JobSchedulingInfo, JobInProgress>(
			
 
				-          jobQueueManager.getComparator(queue));
			
 
				-      jobsPerQueue.put(queue, jobs);
			
 
				-    }
			
 
				-  }
			
 
				+    void addQueue(String queueName) {
			
 
				+      CapacitySchedulerQueue queue = jobQueueManager.getQueue(queueName);
			
 
				 
			
 
				-  /**
			
 
				-   * The queue information class maintains following information per queue:
			
 
				-   * Maximum users allowed to initialize job in the particular queue. Maximum
			
 
				-   * jobs allowed to be initialize per user in the queue.
			
 
				-   * 
			
 
				-   */
			
 
				-  private class QueueInfo {
			
 
				-    String queue;
			
 
				-    int maxUsersAllowedToInitialize;
			
 
				-    int maxJobsPerUserToInitialize;
			
 
				-
			
 
				-    public QueueInfo(String queue, int maxUsersAllowedToInitialize,
			
 
				-        int maxJobsPerUserToInitialize) {
			
 
				-      this.queue = queue;
			
 
				-      this.maxJobsPerUserToInitialize = maxJobsPerUserToInitialize;
			
 
				-      this.maxUsersAllowedToInitialize = maxUsersAllowedToInitialize;
			
 
				+      TreeMap<JobSchedulingInfo, JobInProgress> jobs = 
			
 
				+        new TreeMap<JobSchedulingInfo, JobInProgress>(queue.getComparator());
			
 
				+      jobsPerQueue.put(queueName, jobs);
			
 
				     }
			
 
				   }
			
 
				 
			
 
				-  /**
			
 
				-   * Map which contains the configuration used for initializing jobs
			
 
				-   * in that associated to a particular job queue.
			
 
				-   */
			
 
				-  private HashMap<String, QueueInfo> jobQueues;
			
 
				-
			
 
				   /**
			
 
				    * Set of jobs which have been passed to Initialization threads.
			
 
				    * This is maintained so that we dont call initTasks() for same job twice.
			
 
				    */
			
 
				-  private HashMap<JobID,JobInProgress> initializedJobs;
			
 
				+  private HashMap<JobID, JobInProgress> initializedJobs;
			
 
				 
			
 
				   private volatile boolean running;
			
 
				 
			
@@ -244,40 +214,34 @@ public class JobInitializationPoller extends Thread {
 
				    * The map which provides information which thread should be used to
			
 
				    * initialize jobs for a given job queue.
			
 
				    */
			
 
				-  private HashMap<String, JobInitializationThread> threadsToQueueMap;
			
 
				+  private Map<String, JobInitializationThread> threadsToQueueMap;
			
 
				 
			
 
				   public JobInitializationPoller(JobQueuesManager mgr,
			
 
				       CapacitySchedulerConf rmConf, Set<String> queue, 
			
 
				       TaskTrackerManager ttm) {
			
 
				     initializedJobs = new HashMap<JobID,JobInProgress>();
			
 
				-    jobQueues = new HashMap<String, QueueInfo>();
			
 
				     this.jobQueueManager = mgr;
			
 
				-    threadsToQueueMap = new HashMap<String, JobInitializationThread>();
			
 
				+    threadsToQueueMap = 
			
 
				+      Collections.synchronizedMap(new HashMap<String, 
			
 
				+          JobInitializationThread>());
			
 
				     super.setName("JobInitializationPollerThread");
			
 
				     running = true;
			
 
				     this.ttm = ttm;
			
 
				   }
			
 
				 
			
 
				+  void setTaskTrackerManager(TaskTrackerManager ttm) {
			
 
				+    this.ttm = ttm;
			
 
				+  }
			
 
				+  
			
 
				   /*
			
 
				    * method to read all configuration values required by the initialisation
			
 
				    * poller
			
 
				    */
			
 
				 
			
 
				-  void init(Set<String> queues, CapacitySchedulerConf capacityConf) {
			
 
				-    for (String queue : queues) {
			
 
				-      int userlimit = capacityConf.getMinimumUserLimitPercent(queue);
			
 
				-      int maxUsersToInitialize = ((100 / userlimit) + MAX_ADDITIONAL_USERS_TO_INIT);
			
 
				-      int maxJobsPerUserToInitialize = capacityConf
			
 
				-          .getMaxJobsPerUserToInitialize(queue);
			
 
				-      QueueInfo qi = new QueueInfo(queue, maxUsersToInitialize,
			
 
				-          maxJobsPerUserToInitialize);
			
 
				-      jobQueues.put(queue, qi);
			
 
				-    }
			
 
				+  void init(int numQueues, 
			
 
				+            CapacitySchedulerConf capacityConf) {
			
 
				     sleepInterval = capacityConf.getSleepInterval();
			
 
				-    poolSize = capacityConf.getMaxWorkerThreads();
			
 
				-    if (poolSize > queues.size()) {
			
 
				-      poolSize = queues.size();
			
 
				-    }
			
 
				+    poolSize = Math.min(capacityConf.getMaxWorkerThreads(), numQueues);
			
 
				     assignThreadsToQueues();
			
 
				     Collection<JobInitializationThread> threads = threadsToQueueMap.values();
			
 
				     for (JobInitializationThread t : threads) {
			
@@ -288,6 +252,20 @@ public class JobInitializationPoller extends Thread {
 
				     }
			
 
				   }
			
 
				 
			
 
				+  void reinit(Set<String> queues) {
			
 
				+    Set<String> oldQueues = threadsToQueueMap.keySet();
			
 
				+    int i=0;
			
 
				+    JobInitializationThread[] threads = 
			
 
				+      threadsToQueueMap.values().toArray(new JobInitializationThread[0]);
			
 
				+    for (String newQueue : queues) {
			
 
				+      if (!oldQueues.contains(newQueue)) {
			
 
				+        JobInitializationThread t = threads[i++ % threads.length];
			
 
				+        t.addQueue(newQueue);
			
 
				+        threadsToQueueMap.put(newQueue, t);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				   /**
			
 
				    * This is main thread of initialization poller, We essentially do 
			
 
				    * following in the main threads:
			
@@ -322,7 +300,7 @@ public class JobInitializationPoller extends Thread {
 
				    * 
			
 
				    */
			
 
				   void selectJobsToInitialize() {
			
 
				-    for (String queue : jobQueues.keySet()) {
			
 
				+    for (String queue : jobQueueManager.getAllQueues()) {
			
 
				       ArrayList<JobInProgress> jobsToInitialize = getJobsToInitialize(queue);
			
 
				       printJobs(jobsToInitialize);
			
 
				       JobInitializationThread t = threadsToQueueMap.get(queue);
			
@@ -367,8 +345,9 @@ public class JobInitializationPoller extends Thread {
 
				    * 
			
 
				    */
			
 
				   private void assignThreadsToQueues() {
			
 
				-    int countOfQueues = jobQueues.size();
			
 
				-    String[] queues = (String[]) jobQueues.keySet().toArray(
			
 
				+    Collection<String> queueNames = jobQueueManager.getAllQueues();
			
 
				+    int countOfQueues = queueNames.size();
			
 
				+    String[] queues = (String[]) queueNames.toArray(
			
 
				         new String[countOfQueues]);
			
 
				     int numberOfQueuesPerThread = countOfQueues / poolSize;
			
 
				     int numberOfQueuesAssigned = 0;
			
@@ -424,22 +403,17 @@ public class JobInitializationPoller extends Thread {
 
				    * already been initialized. The latter user's initialized jobs are redundant,
			
 
				    * but we'll leave them initialized.
			
 
				    * 
			
 
				-   * @param queue name of the queue to pick the jobs to initialize.
			
 
				+   * @param queueName name of the queue to pick the jobs to initialize.
			
 
				    * @return list of jobs to be initalized in a queue. An empty queue is
			
 
				    *         returned if no jobs are found.
			
 
				    */
			
 
				-  ArrayList<JobInProgress> getJobsToInitialize(String queue) {
			
 
				-    QueueInfo qi = jobQueues.get(queue);
			
 
				+  ArrayList<JobInProgress> getJobsToInitialize(String queueName) {
			
 
				+    CapacitySchedulerQueue queue = jobQueueManager.getQueue(queueName);
			
 
				     ArrayList<JobInProgress> jobsToInitialize = new ArrayList<JobInProgress>();
			
 
				-    // use the configuration parameter which is configured for the particular
			
 
				-    // queue.
			
 
				-    int maximumUsersAllowedToInitialize = qi.maxUsersAllowedToInitialize;
			
 
				-    int maxJobsPerUserAllowedToInitialize = qi.maxJobsPerUserToInitialize;
			
 
				-    int maxJobsPerQueueToInitialize = maximumUsersAllowedToInitialize
			
 
				-        * maxJobsPerUserAllowedToInitialize;
			
 
				-    int countOfJobsInitialized = 0;
			
 
				-    HashMap<String, Integer> userJobsInitialized = new HashMap<String, Integer>();
			
 
				-    Collection<JobInProgress> jobs = jobQueueManager.getWaitingJobs(queue);
			
 
				+
			
 
				+    Set<String> usersOverLimit = new HashSet<String>();
			
 
				+    Collection<JobInProgress> jobs = queue.getWaitingJobs();
			
 
				+    
			
 
				     /*
			
 
				      * Walk through the collection of waiting jobs.
			
 
				      *  We maintain a map of jobs that have already been initialized. If a 
			
@@ -455,40 +429,45 @@ public class JobInitializationPoller extends Thread {
 
				      */
			
 
				     for (JobInProgress job : jobs) {
			
 
				       String user = job.getProfile().getUser();
			
 
				-      int numberOfJobs = userJobsInitialized.get(user) == null ? 0
			
 
				-          : userJobsInitialized.get(user);
			
 
				-      // If the job is already initialized then add the count against user
			
 
				-      // then continue.
			
 
				+      // If the job is already initialized then continue.
			
 
				       if (initializedJobs.containsKey(job.getJobID())) {
			
 
				-        userJobsInitialized.put(user, Integer.valueOf(numberOfJobs + 1));
			
 
				-        countOfJobsInitialized++;
			
 
				         continue;
			
 
				       }
			
 
				-      boolean isUserPresent = userJobsInitialized.containsKey(user);
			
 
				-      if (!isUserPresent
			
 
				-          && userJobsInitialized.size() < maximumUsersAllowedToInitialize) {
			
 
				-        // this is a new user being considered and the number of users
			
 
				-        // is within limits.
			
 
				-        userJobsInitialized.put(user, Integer.valueOf(numberOfJobs + 1));
			
 
				-        jobsToInitialize.add(job);
			
 
				-        initializedJobs.put(job.getJobID(),job);
			
 
				-        countOfJobsInitialized++;
			
 
				-      } else if (isUserPresent
			
 
				-          && numberOfJobs < maxJobsPerUserAllowedToInitialize) {
			
 
				-        userJobsInitialized.put(user, Integer.valueOf(numberOfJobs + 1));
			
 
				-        jobsToInitialize.add(job);
			
 
				-        initializedJobs.put(job.getJobID(),job);
			
 
				-        countOfJobsInitialized++;
			
 
				-      }
			
 
				-      /*
			
 
				-       * if the maximum number of jobs to initalize for a queue is reached
			
 
				-       * then we stop looking at further jobs. The jobs beyond this number
			
 
				-       * can be initialized.
			
 
				+
			
 
				+      /** 
			
 
				+       * Ensure we will not exceed queue limits
			
 
				        */
			
 
				-      if(countOfJobsInitialized > maxJobsPerQueueToInitialize) {
			
 
				+      if (!queue.initializeJobForQueue(job)) {
			
 
				         break;
			
 
				       }
			
 
				+      
			
 
				+      
			
 
				+      /**
			
 
				+       *  Ensure we will not exceed user limits
			
 
				+       */
			
 
				+      
			
 
				+      // Ensure we don't process a user's jobs out of order 
			
 
				+      if (usersOverLimit.contains(user)) {
			
 
				+        continue;
			
 
				+      }
			
 
				+      
			
 
				+      // Check if the user is within limits 
			
 
				+      if (!queue.initializeJobForUser(job)) {
			
 
				+        usersOverLimit.add(user);   // Note down the user
			
 
				+        continue;
			
 
				+      }
			
 
				+      
			
 
				+      // Ready to initialize! 
			
 
				+      // Double check to ensure that the job has not been killed!
			
 
				+      if (job.getStatus().getRunState() == JobStatus.PREP) {
			
 
				+        initializedJobs.put(job.getJobID(), job);
			
 
				+        jobsToInitialize.add(job);
			
 
				+
			
 
				+        // Inform the queue
			
 
				+        queue.addInitializingJob(job);
			
 
				+      }
			
 
				     }
			
 
				+    
			
 
				     return jobsToInitialize;
			
 
				   }
			
 
				 
			
@@ -535,7 +514,6 @@ public class JobInitializationPoller extends Thread {
 
				           LOG.info("Removing scheduled jobs from waiting queue"
			
 
				               + job.getJobID());
			
 
				           jobsIterator.remove();
			
 
				-          jobQueueManager.removeJobFromWaitingQueue(job);
			
 
				           continue;
			
 
				         }
			
 
				       }
			
--- a/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/JobQueuesManager.java
+++ b/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/JobQueuesManager.java
@@ -18,6 +18,7 @@
 
				 package org.apache.hadoop.mapred;
			
 
				 
			
 
				 import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				 import java.util.Collection;
			
 
				 import java.util.Collections;
			
 
				 import java.util.Comparator;
			
@@ -25,6 +26,7 @@ import java.util.HashMap;
 
				 import java.util.LinkedList;
			
 
				 import java.util.Map;
			
 
				 import java.util.TreeMap;
			
 
				+import java.util.concurrent.ConcurrentHashMap;
			
 
				 
			
 
				 import org.apache.commons.logging.Log;
			
 
				 import org.apache.commons.logging.LogFactory;
			
@@ -36,140 +38,32 @@ import org.apache.hadoop.mapred.JobStatusChangeEvent.EventType;
 
				  * one or more queues. 
			
 
				  */
			
 
				 class JobQueuesManager extends JobInProgressListener {
			
 
				-
			
 
				-  /* 
			
 
				-   * If a queue supports priorities, jobs must be 
			
 
				-   * sorted on priorities, and then on their start times (technically, 
			
 
				-   * their insertion time.  
			
 
				-   * If a queue doesn't support priorities, jobs are
			
 
				-   * sorted based on their start time.  
			
 
				-   */
			
 
				   
			
 
				-  // comparator for jobs in queues that don't support priorities
			
 
				-  private static final Comparator<JobSchedulingInfo> STARTTIME_JOB_COMPARATOR
			
 
				-    = new Comparator<JobSchedulingInfo>() {
			
 
				-    public int compare(JobSchedulingInfo o1, JobSchedulingInfo o2) {
			
 
				-      // the job that started earlier wins
			
 
				-      if (o1.getStartTime() < o2.getStartTime()) {
			
 
				-        return -1;
			
 
				-      } else {
			
 
				-        return (o1.getStartTime() == o2.getStartTime() 
			
 
				-                ? o1.getJobID().compareTo(o2.getJobID()) 
			
 
				-                : 1);
			
 
				-      }
			
 
				-    }
			
 
				-  };
			
 
				-  
			
 
				-  // class to store queue info
			
 
				-  private static class QueueInfo {
			
 
				-
			
 
				-    // whether the queue supports priorities
			
 
				-    boolean supportsPriorities;
			
 
				-    Map<JobSchedulingInfo, JobInProgress> waitingJobs; // for waiting jobs
			
 
				-    Map<JobSchedulingInfo, JobInProgress> runningJobs; // for running jobs
			
 
				-    
			
 
				-    public Comparator<JobSchedulingInfo> comparator;
			
 
				-    
			
 
				-    QueueInfo(boolean prio) {
			
 
				-      this.supportsPriorities = prio;
			
 
				-      if (supportsPriorities) {
			
 
				-        // use the default priority-aware comparator
			
 
				-        comparator = JobQueueJobInProgressListener.FIFO_JOB_QUEUE_COMPARATOR;
			
 
				-      }
			
 
				-      else {
			
 
				-        comparator = STARTTIME_JOB_COMPARATOR;
			
 
				-      }
			
 
				-      waitingJobs = new TreeMap<JobSchedulingInfo, JobInProgress>(comparator);
			
 
				-      runningJobs = new TreeMap<JobSchedulingInfo, JobInProgress>(comparator);
			
 
				-    }
			
 
				-    
			
 
				-    Collection<JobInProgress> getWaitingJobs() {
			
 
				-      synchronized (waitingJobs) {
			
 
				-        return Collections.unmodifiableCollection(
			
 
				-            new LinkedList<JobInProgress>(waitingJobs.values()));
			
 
				-      }
			
 
				-    }
			
 
				-    
			
 
				-    Collection<JobInProgress> getRunningJobs() {
			
 
				-      synchronized (runningJobs) {
			
 
				-       return Collections.unmodifiableCollection(
			
 
				-           new LinkedList<JobInProgress>(runningJobs.values())); 
			
 
				-      }
			
 
				-    }
			
 
				-    
			
 
				-    void addRunningJob(JobInProgress job) {
			
 
				-      synchronized (runningJobs) {
			
 
				-       runningJobs.put(new JobSchedulingInfo(job),job); 
			
 
				-      }
			
 
				-    }
			
 
				-    
			
 
				-    JobInProgress removeRunningJob(JobSchedulingInfo jobInfo) {
			
 
				-      synchronized (runningJobs) {
			
 
				-        return runningJobs.remove(jobInfo); 
			
 
				-      }
			
 
				-    }
			
 
				-    
			
 
				-    JobInProgress removeWaitingJob(JobSchedulingInfo schedInfo) {
			
 
				-      synchronized (waitingJobs) {
			
 
				-        return waitingJobs.remove(schedInfo);
			
 
				-      }
			
 
				-    }
			
 
				-    
			
 
				-    void addWaitingJob(JobInProgress job) {
			
 
				-      synchronized (waitingJobs) {
			
 
				-        waitingJobs.put(new JobSchedulingInfo(job), job);
			
 
				-      }
			
 
				-    }
			
 
				-    
			
 
				-    int getWaitingJobCount() {
			
 
				-      synchronized (waitingJobs) {
			
 
				-       return waitingJobs.size(); 
			
 
				-      }
			
 
				-    }
			
 
				-    
			
 
				-  }
			
 
				-  
			
 
				-  // we maintain a hashmap of queue-names to queue info
			
 
				-  private Map<String, QueueInfo> jobQueues = 
			
 
				-    new HashMap<String, QueueInfo>();
			
 
				   private static final Log LOG = LogFactory.getLog(JobQueuesManager.class);
			
 
				   private CapacityTaskScheduler scheduler;
			
 
				+  // Queues in the system
			
 
				+  private Collection<String> jobQueueNames;
			
 
				+  private Map<String, CapacitySchedulerQueue> jobQueues = 
			
 
				+    new HashMap<String, CapacitySchedulerQueue>();
			
 
				 
			
 
				   
			
 
				   JobQueuesManager(CapacityTaskScheduler s) {
			
 
				     this.scheduler = s;
			
 
				   }
			
 
				   
			
 
				-  /**
			
 
				-   * create an empty queue with the default comparator
			
 
				-   * @param queueName The name of the queue
			
 
				-   * @param supportsPriotities whether the queue supports priorities
			
 
				-   */
			
 
				-  public void createQueue(String queueName, boolean supportsPriotities) {
			
 
				-    jobQueues.put(queueName, new QueueInfo(supportsPriotities));
			
 
				-  }
			
 
				-  
			
 
				-  /**
			
 
				-   * Returns the queue of running jobs associated with the name
			
 
				-   */
			
 
				-  public Collection<JobInProgress> getRunningJobQueue(String queueName) {
			
 
				-    return jobQueues.get(queueName).getRunningJobs();
			
 
				-  }
			
 
				-  
			
 
				-  /**
			
 
				-   * Returns the queue of waiting jobs associated with queue name.
			
 
				-   * 
			
 
				-   */
			
 
				-  Collection<JobInProgress> getWaitingJobs(String queueName) {
			
 
				-    return jobQueues.get(queueName).getWaitingJobs();
			
 
				+  void setQueues(Map<String, CapacitySchedulerQueue> queues) {
			
 
				+    this.jobQueues = queues;
			
 
				+    this.jobQueueNames = new ArrayList<String>(queues.keySet());
			
 
				   }
			
 
				   
			
 
				   @Override
			
 
				   public void jobAdded(JobInProgress job) throws IOException {
			
 
				-    LOG.info("Job submitted to queue " + job.getProfile().getQueueName());
			
 
				+    LOG.info("Job " + job.getJobID() + " submitted to queue " + 
			
 
				+        job.getProfile().getQueueName());
			
 
				+    
			
 
				     // add job to the right queue
			
 
				-    QueueInfo qi = jobQueues.get(job.getProfile().getQueueName());
			
 
				-    if (null == qi) {
			
 
				+    CapacitySchedulerQueue queue = getQueue(job.getProfile().getQueueName());
			
 
				+    if (null == queue) {
			
 
				       // job was submitted to a queue we're not aware of
			
 
				       LOG.warn("Invalid queue " + job.getProfile().getQueueName() + 
			
 
				           " specified for job" + job.getProfile().getJobID() + 
			
@@ -178,7 +72,7 @@ class JobQueuesManager extends JobInProgressListener {
 
				     }
			
 
				     // add job to waiting queue. It will end up in the right place, 
			
 
				     // based on priority. 
			
 
				-    qi.addWaitingJob(job);
			
 
				+    queue.addWaitingJob(job);
			
 
				     // let scheduler know. 
			
 
				     scheduler.jobAdded(job);
			
 
				   }
			
@@ -188,15 +82,21 @@ class JobQueuesManager extends JobInProgressListener {
 
				    * job queue manager.
			
 
				    */
			
 
				   private void jobCompleted(JobInProgress job, JobSchedulingInfo oldInfo, 
			
 
				-                            QueueInfo qi) {
			
 
				+      CapacitySchedulerQueue queue, int runState) {
			
 
				     LOG.info("Job " + job.getJobID().toString() + " submitted to queue " 
			
 
				         + job.getProfile().getQueueName() + " has completed");
			
 
				     //remove jobs from both queue's a job can be in
			
 
				     //running and waiting queue at the same time.
			
 
				-    qi.removeRunningJob(oldInfo);
			
 
				-    qi.removeWaitingJob(oldInfo);
			
 
				-    // let scheduler know
			
 
				-    scheduler.jobCompleted(job);
			
 
				+    JobInProgress waitingJob = queue.removeWaitingJob(oldInfo, runState);
			
 
				+    JobInProgress initializingJob = 
			
 
				+      queue.removeInitializingJob(oldInfo, runState);
			
 
				+    JobInProgress runningJob = queue.removeRunningJob(oldInfo, runState);
			
 
				+    
			
 
				+    // let scheduler know if necessary
			
 
				+    // sometimes this isn't necessary if the job was rejected during submission
			
 
				+    if (runningJob != null || initializingJob != null || waitingJob != null) {
			
 
				+      scheduler.jobCompleted(job);
			
 
				+    }
			
 
				   }
			
 
				   
			
 
				   // Note that job is removed when the job completes i.e in jobUpated()
			
@@ -206,27 +106,36 @@ class JobQueuesManager extends JobInProgressListener {
 
				   // This is used to reposition a job in the queue. A job can get repositioned 
			
 
				   // because of the change in the job priority or job start-time.
			
 
				   private void reorderJobs(JobInProgress job, JobSchedulingInfo oldInfo, 
			
 
				-                           QueueInfo qi) {
			
 
				-    
			
 
				-    if(qi.removeWaitingJob(oldInfo) != null) {
			
 
				-      qi.addWaitingJob(job);
			
 
				+      CapacitySchedulerQueue queue, int runState) {
			
 
				+    if(queue.removeWaitingJob(oldInfo, runState) != null) {
			
 
				+      try {
			
 
				+        queue.addWaitingJob(job);
			
 
				+      } catch (IOException ioe) {
			
 
				+        // Ignore, cannot happen
			
 
				+        LOG.warn("Couldn't change priority!");
			
 
				+        return;
			
 
				+      }
			
 
				     }
			
 
				-    if(qi.removeRunningJob(oldInfo) != null) {
			
 
				-      qi.addRunningJob(job);
			
 
				+    if (queue.removeInitializingJob(oldInfo, runState) != null) {
			
 
				+      queue.addInitializingJob(job);
			
 
				+    }
			
 
				+    if(queue.removeRunningJob(oldInfo, runState) != null) {
			
 
				+      queue.addRunningJob(job);
			
 
				     }
			
 
				   }
			
 
				   
			
 
				   // This is used to move a job from the waiting queue to the running queue.
			
 
				   private void makeJobRunning(JobInProgress job, JobSchedulingInfo oldInfo, 
			
 
				-                              QueueInfo qi) {
			
 
				+                              CapacitySchedulerQueue queue) {
			
 
				     // Removing of the job from job list is responsibility of the
			
 
				     //initialization poller.
			
 
				     // Add the job to the running queue
			
 
				-    qi.addRunningJob(job);
			
 
				+    queue.addRunningJob(job);
			
 
				   }
			
 
				   
			
 
				   // Update the scheduler as job's state has changed
			
 
				-  private void jobStateChanged(JobStatusChangeEvent event, QueueInfo qi) {
			
 
				+  private void jobStateChanged(JobStatusChangeEvent event, 
			
 
				+                               CapacitySchedulerQueue queue) {
			
 
				     JobInProgress job = event.getJobInProgress();
			
 
				     JobSchedulingInfo oldJobStateInfo = 
			
 
				       new JobSchedulingInfo(event.getOldStatus());
			
@@ -235,16 +144,17 @@ class JobQueuesManager extends JobInProgressListener {
 
				     if (event.getEventType() == EventType.PRIORITY_CHANGED 
			
 
				         || event.getEventType() == EventType.START_TIME_CHANGED) {
			
 
				       // Make a priority change
			
 
				-      reorderJobs(job, oldJobStateInfo, qi);
			
 
				+      int runState = job.getStatus().getRunState();
			
 
				+      reorderJobs(job, oldJobStateInfo, queue, runState);
			
 
				     } else if (event.getEventType() == EventType.RUN_STATE_CHANGED) {
			
 
				       // Check if the job is complete
			
 
				       int runState = job.getStatus().getRunState();
			
 
				       if (runState == JobStatus.SUCCEEDED
			
 
				           || runState == JobStatus.FAILED
			
 
				           || runState == JobStatus.KILLED) {
			
 
				-        jobCompleted(job, oldJobStateInfo, qi);
			
 
				+        jobCompleted(job, oldJobStateInfo, queue, runState);
			
 
				       } else if (runState == JobStatus.RUNNING) {
			
 
				-        makeJobRunning(job, oldJobStateInfo, qi);
			
 
				+        makeJobRunning(job, oldJobStateInfo, queue);
			
 
				       }
			
 
				     }
			
 
				   }
			
@@ -252,8 +162,8 @@ class JobQueuesManager extends JobInProgressListener {
 
				   @Override
			
 
				   public void jobUpdated(JobChangeEvent event) {
			
 
				     JobInProgress job = event.getJobInProgress();
			
 
				-    QueueInfo qi = jobQueues.get(job.getProfile().getQueueName());
			
 
				-    if (null == qi) {
			
 
				+    CapacitySchedulerQueue queue = getQueue(job.getProfile().getQueueName());
			
 
				+    if (null == queue) {
			
 
				       // can't find queue for job. Shouldn't happen. 
			
 
				       LOG.warn("Could not find queue " + job.getProfile().getQueueName() + 
			
 
				           " when updating job " + job.getProfile().getJobID());
			
@@ -262,26 +172,15 @@ class JobQueuesManager extends JobInProgressListener {
 
				     
			
 
				     // Check if this is the status change
			
 
				     if (event instanceof JobStatusChangeEvent) {
			
 
				-      jobStateChanged((JobStatusChangeEvent)event, qi);
			
 
				+      jobStateChanged((JobStatusChangeEvent)event, queue);
			
 
				     }
			
 
				   }
			
 
				   
			
 
				-  void removeJobFromWaitingQueue(JobInProgress job) {
			
 
				-    String queue = job.getProfile().getQueueName();
			
 
				-    QueueInfo qi = jobQueues.get(queue);
			
 
				-    qi.removeWaitingJob(new JobSchedulingInfo(job));
			
 
				-  }
			
 
				-  
			
 
				-  Comparator<JobSchedulingInfo> getComparator(String queue) {
			
 
				-    return jobQueues.get(queue).comparator;
			
 
				+  CapacitySchedulerQueue getQueue(String queue) {
			
 
				+    return jobQueues.get(queue);
			
 
				   }
			
 
				   
			
 
				-  int getWaitingJobCount(String queue) {
			
 
				-    QueueInfo qi = jobQueues.get(queue);
			
 
				-    return qi.getWaitingJobCount();
			
 
				-  }
			
 
				-
			
 
				-  boolean doesQueueSupportPriorities(String queueName) {
			
 
				-    return jobQueues.get(queueName).supportsPriorities;
			
 
				+  Collection<String> getAllQueues() {
			
 
				+    return Collections.unmodifiableCollection(jobQueueNames);
			
 
				   }
			
 
				 }
			
--- a/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/MemoryMatcher.java
+++ b/src/contrib/capacity-scheduler/src/java/org/apache/hadoop/mapred/MemoryMatcher.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.mapred;
 
				 
			
 
				 import org.apache.commons.logging.Log;
			
 
				 import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.mapreduce.TaskType;
			
 
				 
			
 
				 class MemoryMatcher {
			
 
				 
			
@@ -44,131 +45,123 @@ class MemoryMatcher {
 
				     return true;
			
 
				   }
			
 
				 
			
 
				+  
			
 
				   /**
			
 
				    * Find the memory that is already used by all the running tasks
			
 
				    * residing on the given TaskTracker.
			
 
				    * 
			
 
				    * @param taskTracker
			
 
				    * @param taskType 
			
 
				+   * @param availableSlots
			
 
				    * @return amount of memory that is used by the residing tasks,
			
 
				    *          null if memory cannot be computed for some reason.
			
 
				    */
			
 
				-  synchronized Long getMemReservedForTasks(
			
 
				-      TaskTrackerStatus taskTracker, CapacityTaskScheduler.TYPE taskType) {
			
 
				+  synchronized long getMemReservedForTasks(
			
 
				+      TaskTrackerStatus taskTracker, TaskType taskType, int availableSlots) {
			
 
				+    int currentlyScheduled = 
			
 
				+      currentlyScheduled(taskTracker, taskType, availableSlots);
			
 
				     long vmem = 0;
			
 
				 
			
 
				     for (TaskStatus task : taskTracker.getTaskReports()) {
			
 
				       // the following task states are one in which the slot is
			
 
				       // still occupied and hence memory of the task should be
			
 
				       // accounted in used memory.
			
 
				-      if ((task.getRunState() == TaskStatus.State.RUNNING)
			
 
				-          || (task.getRunState() == TaskStatus.State.COMMIT_PENDING)) {
			
 
				-        JobInProgress job =
			
 
				-            scheduler.taskTrackerManager.getJob(task.getTaskID().getJobID());
			
 
				-        if (job == null) {
			
 
				-          // This scenario can happen if a job was completed/killed
			
 
				-          // and retired from JT's memory. In this state, we can ignore
			
 
				-          // the running task status and compute memory for the rest of
			
 
				-          // the tasks. However, any scheduling done with this computation
			
 
				-          // could result in over-subscribing of memory for tasks on this
			
 
				-          // TT (as the unaccounted for task is still running).
			
 
				-          // So, it is safer to not schedule anything for this TT
			
 
				-          // One of the ways of doing that is to return null from here
			
 
				-          // and check for null in the calling method.
			
 
				-          LOG.info("Task tracker: " + taskTracker.getHost() + " is reporting "
			
 
				-              + "a running / commit pending task: " + task.getTaskID()
			
 
				-              + " but no corresponding job was found. "
			
 
				-              + "Maybe job was retired. Not computing "
			
 
				-              + "memory values for this TT.");
			
 
				-          return null;
			
 
				-        }
			
 
				-
			
 
				-        JobConf jConf = job.getJobConf();
			
 
				-
			
 
				-        // Get the memory "allotted" for this task by rounding off the job's
			
 
				-        // tasks' memory limits to the nearest multiple of the slot-memory-size
			
 
				-        // set on JT. This essentially translates to tasks of a high memory job
			
 
				-        // using multiple slots.
			
 
				+      if ((task.getRunState() == TaskStatus.State.RUNNING) ||
			
 
				+          (task.getRunState() == TaskStatus.State.UNASSIGNED) ||
			
 
				+          (task.inTaskCleanupPhase())) {
			
 
				+        // Get the memory "allotted" for this task based on number of slots
			
 
				         long myVmem = 0;
			
 
				-        if (task.getIsMap() && taskType.equals(CapacityTaskScheduler.TYPE.MAP)) {
			
 
				-          myVmem = jConf.getMemoryForMapTask();
			
 
				-          myVmem =
			
 
				-              (long) (scheduler.getMemSizeForMapSlot() * Math
			
 
				-                  .ceil((float) myVmem
			
 
				-                      / (float) scheduler.getMemSizeForMapSlot()));
			
 
				+        if (task.getIsMap() && taskType == TaskType.MAP) {
			
 
				+          long memSizePerMapSlot = scheduler.getMemSizeForMapSlot(); 
			
 
				+          myVmem = 
			
 
				+            memSizePerMapSlot * task.getNumSlots();
			
 
				         } else if (!task.getIsMap()
			
 
				-            && taskType.equals(CapacityTaskScheduler.TYPE.REDUCE)) {
			
 
				-          myVmem = jConf.getMemoryForReduceTask();
			
 
				-          myVmem =
			
 
				-              (long) (scheduler.getMemSizeForReduceSlot() * Math
			
 
				-                  .ceil((float) myVmem
			
 
				-                      / (float) scheduler.getMemSizeForReduceSlot()));
			
 
				+            && taskType == TaskType.REDUCE) {
			
 
				+          long memSizePerReduceSlot = scheduler.getMemSizeForReduceSlot(); 
			
 
				+          myVmem = memSizePerReduceSlot * task.getNumSlots();
			
 
				         }
			
 
				         vmem += myVmem;
			
 
				       }
			
 
				     }
			
 
				 
			
 
				-    return Long.valueOf(vmem);
			
 
				+    long currentlyScheduledVMem = 
			
 
				+      currentlyScheduled * ((taskType == TaskType.MAP) ? 
			
 
				+          scheduler.getMemSizeForMapSlot() : 
			
 
				+            scheduler.getMemSizeForReduceSlot());
			
 
				+    return vmem + currentlyScheduledVMem; 
			
 
				   }
			
 
				 
			
 
				+  private int currentlyScheduled(TaskTrackerStatus taskTracker, 
			
 
				+                                 TaskType taskType, int availableSlots) {
			
 
				+    int scheduled = 0;
			
 
				+    if (taskType == TaskType.MAP) {
			
 
				+      scheduled = 
			
 
				+        (taskTracker.getMaxMapSlots() - taskTracker.countOccupiedMapSlots()) - 
			
 
				+            availableSlots;
			
 
				+    } else {
			
 
				+      scheduled = 
			
 
				+        (taskTracker.getMaxReduceSlots() - 
			
 
				+            taskTracker.countOccupiedReduceSlots()) - availableSlots;
			
 
				+    }
			
 
				+    return scheduled;
			
 
				+  }
			
 
				   /**
			
 
				    * Check if a TT has enough memory to run of task specified from this job.
			
 
				    * @param job
			
 
				    * @param taskType 
			
 
				    * @param taskTracker
			
 
				+   * @param availableSlots
			
 
				    * @return true if this TT has enough memory for this job. False otherwise.
			
 
				    */
			
 
				-  boolean matchesMemoryRequirements(JobInProgress job,
			
 
				-      CapacityTaskScheduler.TYPE taskType, TaskTrackerStatus taskTracker) {
			
 
				+  boolean matchesMemoryRequirements(JobInProgress job,TaskType taskType, 
			
 
				+                                    TaskTrackerStatus taskTracker, 
			
 
				+                                    int availableSlots) {
			
 
				 
			
 
				-    LOG.debug("Matching memory requirements of " + job.getJobID().toString()
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("Matching memory requirements of " + job.getJobID().toString()
			
 
				         + " for scheduling on " + taskTracker.trackerName);
			
 
				+    }
			
 
				 
			
 
				     if (!isSchedulingBasedOnMemEnabled()) {
			
 
				-      LOG.debug("Scheduling based on job's memory requirements is disabled."
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("Scheduling based on job's memory requirements is disabled."
			
 
				           + " Ignoring any value set by job.");
			
 
				+      }
			
 
				       return true;
			
 
				     }
			
 
				 
			
 
				-    Long memUsedOnTT = getMemReservedForTasks(taskTracker, taskType);
			
 
				-    if (memUsedOnTT == null) {
			
 
				-      // For some reason, maybe because we could not find the job
			
 
				-      // corresponding to a running task (as can happen if the job
			
 
				-      // is retired in between), we could not compute the memory state
			
 
				-      // on this TT. Treat this as an error, and fail memory
			
 
				-      // requirements.
			
 
				-      LOG.info("Could not compute memory for taskTracker: "
			
 
				-          + taskTracker.getHost() + ". Failing memory requirements.");
			
 
				-      return false;
			
 
				-    }
			
 
				-
			
 
				+    long memUsedOnTT = 
			
 
				+      getMemReservedForTasks(taskTracker, taskType, availableSlots);
			
 
				     long totalMemUsableOnTT = 0;
			
 
				-
			
 
				     long memForThisTask = 0;
			
 
				-    if (taskType.equals(CapacityTaskScheduler.TYPE.MAP)) {
			
 
				-      memForThisTask = job.getJobConf().getMemoryForMapTask();
			
 
				+    if (taskType == TaskType.MAP) {
			
 
				+      memForThisTask = job.getMemoryForMapTask();
			
 
				       totalMemUsableOnTT =
			
 
				-          scheduler.getMemSizeForMapSlot() * taskTracker.getMaxMapTasks();
			
 
				-    } else if (taskType.equals(CapacityTaskScheduler.TYPE.REDUCE)) {
			
 
				-      memForThisTask = job.getJobConf().getMemoryForReduceTask();
			
 
				+          scheduler.getMemSizeForMapSlot() * taskTracker.getMaxMapSlots();
			
 
				+    } else if (taskType == TaskType.REDUCE) {
			
 
				+      memForThisTask = job.getMemoryForReduceTask();
			
 
				       totalMemUsableOnTT =
			
 
				           scheduler.getMemSizeForReduceSlot()
			
 
				-              * taskTracker.getMaxReduceTasks();
			
 
				+              * taskTracker.getMaxReduceSlots();
			
 
				     }
			
 
				 
			
 
				-    long freeMemOnTT = totalMemUsableOnTT - memUsedOnTT.longValue();
			
 
				+    long freeMemOnTT = totalMemUsableOnTT - memUsedOnTT;
			
 
				     if (memForThisTask > freeMemOnTT) {
			
 
				-      LOG.debug("memForThisTask (" + memForThisTask + ") > freeMemOnTT ("
			
 
				-          + freeMemOnTT + "). A " + taskType + " task from "
			
 
				-          + job.getJobID().toString() + " cannot be scheduled on TT "
			
 
				-          + taskTracker.trackerName);
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("memForThisTask (" + memForThisTask + ") > freeMemOnTT ("
			
 
				+                  + freeMemOnTT + "). A " + taskType + " task from "
			
 
				+                  + job.getJobID().toString() + " cannot be scheduled on TT "
			
 
				+                  + taskTracker.trackerName);
			
 
				+      }
			
 
				       return false;
			
 
				     }
			
 
				 
			
 
				-    LOG.debug("memForThisTask = " + memForThisTask + ". freeMemOnTT = "
			
 
				-        + freeMemOnTT + ". A " + taskType.toString() + " task from "
			
 
				-        + job.getJobID().toString() + " matches memory requirements on TT "
			
 
				-        + taskTracker.trackerName);
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("memForThisTask = " + memForThisTask + ". freeMemOnTT = "
			
 
				+                + freeMemOnTT + ". A " + taskType.toString() + " task from "
			
 
				+                + job.getJobID().toString() + " matches memory requirements "
			
 
				+                + "on TT "+ taskTracker.trackerName);
			
 
				+    }
			
 
				     return true;
			
 
				   }
			
 
				 }
			
--- a/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/ClusterWithCapacityScheduler.java
+++ b/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/ClusterWithCapacityScheduler.java
@@ -146,7 +146,7 @@ public class ClusterWithCapacityScheduler extends TestCase {
 
				   }
			
 
				 
			
 
				   protected JobConf getJobConf() {
			
 
				-    return this.jobConf;
			
 
				+    return new JobConf(this.jobConf);
			
 
				   }
			
 
				 
			
 
				   protected JobTracker getJobTracker() {
			
@@ -197,6 +197,13 @@ public class ClusterWithCapacityScheduler extends TestCase {
 
				     }
			
 
				   }
			
 
				 
			
 
				+  /**
			
 
				+   * @return the mrCluster
			
 
				+   */
			
 
				+  public MiniMRCluster getMrCluster() {
			
 
				+    return mrCluster;
			
 
				+  }
			
 
				+
			
 
				   static class MyClassLoader extends ClassLoader {
			
 
				     @Override
			
 
				     public URL getResource(String name) {
			
--- a/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestCapacityScheduler.java
+++ b/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestCapacityScheduler.java
--- a/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestCapacitySchedulerConf.java
+++ b/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestCapacitySchedulerConf.java
@@ -278,6 +278,26 @@ public class TestCapacitySchedulerConf extends TestCase {
 
				     }
			
 
				   }
			
 
				   
			
 
				+  public void testInvalidMaxCapacity() throws IOException {
			
 
				+    openFile();
			
 
				+    startConfig();
			
 
				+    writeProperty(
			
 
				+      "mapred.capacity-scheduler.queue.default.capacity", "70");
			
 
				+    writeProperty(
			
 
				+      "mapred.capacity-scheduler.queue.default.maximum-capacity", "50");
			
 
				+    endConfig();
			
 
				+    testConf = new CapacitySchedulerConf(new Path(testConfFile));
			
 
				+
			
 
				+    try {
			
 
				+      testConf.getMaxCapacity("default");
			
 
				+      fail(" getMaxCapacity worked " + testConf.getCapacity("default"));
			
 
				+    } catch (IllegalArgumentException e) {
			
 
				+      assertEquals(
			
 
				+        CapacitySchedulerConf.MAX_CAPACITY_PROPERTY + " 50.0"+
			
 
				+          " for a queue should be greater than or equal to capacity ", e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				   public void testInitializationPollerProperties() 
			
 
				     throws Exception {
			
 
				     /*
			
--- a/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestCapacitySchedulerServlet.java
+++ b/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestCapacitySchedulerServlet.java
@@ -0,0 +1,72 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+import java.util.Properties;
			
 
				+import java.net.URL;
			
 
				+
			
 
				+public class TestCapacitySchedulerServlet extends
			
 
				+    ClusterWithCapacityScheduler {
			
 
				+
			
 
				+  /**
			
 
				+   * Test case checks CapacitySchedulerServlet. Check if queues are 
			
 
				+   * initialized {@link CapacityTaskScheduler} 
			
 
				+   * 
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  public void testCapacitySchedulerServlet() throws IOException {
			
 
				+    Properties schedulerProps = new Properties();
			
 
				+    String[] queues = new String[] { "Q1", "Q2" };
			
 
				+    for (String q : queues) {
			
 
				+      schedulerProps.put(CapacitySchedulerConf
			
 
				+          .toFullPropertyName(q, "capacity"), "50");
			
 
				+      schedulerProps.put(CapacitySchedulerConf.toFullPropertyName(q,
			
 
				+          "minimum-user-limit-percent"), "100");
			
 
				+    }
			
 
				+    Properties clusterProps = new Properties();
			
 
				+    clusterProps.put("mapred.tasktracker.map.tasks.maximum", String.valueOf(2));
			
 
				+    clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String
			
 
				+        .valueOf(2));
			
 
				+    clusterProps.put("mapred.queue.names", queues[0] + "," + queues[1]);
			
 
				+    startCluster(2, clusterProps, schedulerProps);
			
 
				+
			
 
				+    JobTracker jt = getJobTracker();
			
 
				+    int port = jt.getInfoPort();
			
 
				+    String host = jt.getJobTrackerMachine();
			
 
				+    URL url = new URL("http://" + host + ":" + port + "/scheduler");
			
 
				+    String queueData = readOutput(url);
			
 
				+    assertTrue(queueData.contains("Q1"));
			
 
				+    assertTrue(queueData.contains("Q2"));
			
 
				+    assertTrue(queueData.contains("50.0%"));
			
 
				+  }
			
 
				+
			
 
				+  private String readOutput(URL url) throws IOException {
			
 
				+    StringBuilder out = new StringBuilder();
			
 
				+    InputStream in = url.openConnection().getInputStream();
			
 
				+    byte[] buffer = new byte[64 * 1024];
			
 
				+    int len = in.read(buffer);
			
 
				+    while (len > 0) {
			
 
				+      out.append(new String(buffer, 0, len));
			
 
				+      len = in.read(buffer);
			
 
				+    }
			
 
				+    return out.toString();
			
 
				+  }
			
 
				+}
			
--- a/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestCapacitySchedulerWithJobTracker.java
+++ b/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestCapacitySchedulerWithJobTracker.java
@@ -0,0 +1,129 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.Properties;
			
 
				+
			
 
				+import org.apache.hadoop.examples.SleepJob;
			
 
				+
			
 
				+
			
 
				+public class TestCapacitySchedulerWithJobTracker extends
			
 
				+    ClusterWithCapacityScheduler {
			
 
				+
			
 
				+  /**
			
 
				+   * Test case which checks if the jobs which fail initialization are removed
			
 
				+   * from the {@link CapacityTaskScheduler} waiting queue.
			
 
				+   * 
			
 
				+   * @throws Exception
			
 
				+   */
			
 
				+  public void testFailingJobInitalization() throws Exception {
			
 
				+    Properties schedulerProps = new Properties();
			
 
				+    schedulerProps.put("mapred.capacity-scheduler.queue.default.capacity",
			
 
				+        "100");
			
 
				+    Properties clusterProps = new Properties();
			
 
				+    clusterProps.put("mapred.tasktracker.map.tasks.maximum", String.valueOf(1));
			
 
				+    clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String
			
 
				+        .valueOf(1));
			
 
				+    clusterProps.put("mapred.jobtracker.maxtasks.per.job", String.valueOf(1));
			
 
				+    // cluster capacity 1 maps, 1 reduces
			
 
				+    startCluster(1, clusterProps, schedulerProps);
			
 
				+    JobConf conf = getJobConf();
			
 
				+    conf.setSpeculativeExecution(false);
			
 
				+    conf.set("mapred.committer.job.setup.cleanup.needed", "false");
			
 
				+    conf.setNumTasksToExecutePerJvm(-1);
			
 
				+    SleepJob sleepJob = new SleepJob();
			
 
				+    sleepJob.setConf(conf);
			
 
				+    JobConf job = sleepJob.setupJobConf(3, 3, 1, 1, 1, 1);
			
 
				+    RunningJob rjob;
			
 
				+    try {
			
 
				+      rjob = runJob(job, false);
			
 
				+      fail("The job should have thrown Exception");
			
 
				+    } catch (Exception e) {
			
 
				+      CapacityTaskScheduler scheduler = (CapacityTaskScheduler) getJobTracker()
			
 
				+          .getTaskScheduler();
			
 
				+      JobQueuesManager mgr = scheduler.jobQueuesManager;
			
 
				+      assertEquals("Failed job present in Waiting queue", 0, mgr
			
 
				+          .getQueue("default").getNumWaitingJobs());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Test case which checks {@link JobTracker} and {@link CapacityTaskScheduler}
			
 
				+   * 
			
 
				+   * Test case submits 2 jobs in two different capacity scheduler queues. And
			
 
				+   * checks if the jobs successfully complete.
			
 
				+   * 
			
 
				+   * @throws Exception
			
 
				+   */
			
 
				+  public void testJobTrackerIntegration() throws Exception {
			
 
				+
			
 
				+    Properties schedulerProps = new Properties();
			
 
				+    String[] queues = new String[] { "Q1", "Q2" };
			
 
				+    RunningJob jobs[] = new RunningJob[2];
			
 
				+    for (String q : queues) {
			
 
				+      schedulerProps.put(CapacitySchedulerConf
			
 
				+          .toFullPropertyName(q, "capacity"), "50");
			
 
				+      schedulerProps.put(CapacitySchedulerConf.toFullPropertyName(q,
			
 
				+          "minimum-user-limit-percent"), "100");
			
 
				+    }
			
 
				+
			
 
				+    Properties clusterProps = new Properties();
			
 
				+    clusterProps.put("mapred.tasktracker.map.tasks.maximum", String.valueOf(2));
			
 
				+    clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String
			
 
				+        .valueOf(2));
			
 
				+    clusterProps.put("mapred.queue.names", queues[0] + "," + queues[1]);
			
 
				+    startCluster(2, clusterProps, schedulerProps);
			
 
				+
			
 
				+    JobConf conf = getJobConf();
			
 
				+    conf.setSpeculativeExecution(false);
			
 
				+    conf.set("mapred.committer.job.setup.cleanup.needed", "false");
			
 
				+    conf.setNumTasksToExecutePerJvm(-1);
			
 
				+    conf.setQueueName(queues[0]);
			
 
				+    SleepJob sleepJob1 = new SleepJob();
			
 
				+    sleepJob1.setConf(conf);
			
 
				+    JobConf sleepJobConf = sleepJob1.setupJobConf(1, 1, 1, 1, 1, 1);
			
 
				+    jobs[0] = runJob(sleepJobConf, true);
			
 
				+
			
 
				+    JobConf conf2 = getJobConf();
			
 
				+    conf2.setSpeculativeExecution(false);
			
 
				+    conf2.set("mapred.committer.job.setup.cleanup.needed", "false");
			
 
				+    conf2.setNumTasksToExecutePerJvm(-1);
			
 
				+    conf2.setQueueName(queues[1]);
			
 
				+    SleepJob sleepJob2 = new SleepJob();
			
 
				+    sleepJob2.setConf(conf2);
			
 
				+    JobConf sleep2 = sleepJob2.setupJobConf(3, 3, 5, 3, 5, 3);
			
 
				+    jobs[1] = runJob(sleep2, false);
			
 
				+    assertTrue("Sleep job submitted to queue 1 is not successful", jobs[0]
			
 
				+        .isSuccessful());
			
 
				+    assertTrue("Sleep job submitted to queue 2 is not successful", jobs[1]
			
 
				+        .isSuccessful());
			
 
				+  }
			
 
				+
			
 
				+  private RunningJob runJob(JobConf conf, boolean inBackGround)
			
 
				+      throws IOException {
			
 
				+    if (!inBackGround) {
			
 
				+      RunningJob rjob = JobClient.runJob(conf);
			
 
				+      return rjob;
			
 
				+    } else {
			
 
				+      RunningJob rJob = new JobClient(conf).submitJob(conf);
			
 
				+      return rJob;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestJobInitialization.java
+++ b/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestJobInitialization.java
@@ -1,57 +0,0 @@
 
				-/**
			
 
				- * Licensed to the Apache Software Foundation (ASF) under one
			
 
				- * or more contributor license agreements.  See the NOTICE file
			
 
				- * distributed with this work for additional information
			
 
				- * regarding copyright ownership.  The ASF licenses this file
			
 
				- * to you under the Apache License, Version 2.0 (the
			
 
				- * "License"); you may not use this file except in compliance
			
 
				- * with the License.  You may obtain a copy of the License at
			
 
				- *
			
 
				- *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				- *
			
 
				- * Unless required by applicable law or agreed to in writing, software
			
 
				- * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				- * See the License for the specific language governing permissions and
			
 
				- * limitations under the License.
			
 
				- */
			
 
				-
			
 
				-package org.apache.hadoop.mapred;
			
 
				-
			
 
				-import java.util.Properties;
			
 
				-import org.apache.hadoop.mapred.ControlledMapReduceJob.ControlledMapReduceJobRunner;
			
 
				-
			
 
				-public class TestJobInitialization extends ClusterWithCapacityScheduler {
			
 
				- 
			
 
				-  public void testFailingJobInitalization() throws Exception {
			
 
				-    Properties schedulerProps = new Properties();
			
 
				-    schedulerProps.put(
			
 
				-        "mapred.capacity-scheduler.queue.default.capacity", "100");
			
 
				-    Properties clusterProps = new Properties();
			
 
				-    clusterProps
			
 
				-        .put("mapred.tasktracker.map.tasks.maximum", String.valueOf(1));
			
 
				-    clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String
			
 
				-        .valueOf(1));
			
 
				-    clusterProps.put("mapred.jobtracker.maxtasks.per.job", String
			
 
				-        .valueOf(1));
			
 
				-    // cluster capacity 1 maps, 1 reduces
			
 
				-    startCluster(1, clusterProps, schedulerProps);
			
 
				-    ControlledMapReduceJobRunner jobRunner =
			
 
				-      ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
			
 
				-          getJobConf(), 3, 3);
			
 
				-    jobRunner.start();
			
 
				-    JobID myJobID = jobRunner.getJobID();
			
 
				-    JobInProgress myJob = getJobTracker().getJob(myJobID);
			
 
				-    while(!myJob.isComplete()) {
			
 
				-      Thread.sleep(1000);
			
 
				-    }
			
 
				-    assertTrue("The submitted job successfully completed", 
			
 
				-        myJob.status.getRunState() == JobStatus.FAILED);
			
 
				-    CapacityTaskScheduler scheduler = (CapacityTaskScheduler) getJobTracker().getTaskScheduler();
			
 
				-    JobQueuesManager mgr = scheduler.jobQueuesManager;
			
 
				-    assertEquals("Failed job present in Waiting queue", 
			
 
				-        0, mgr.getWaitingJobCount("default"));
			
 
				-    assertFalse("Failed job present in Waiting queue", 
			
 
				-        mgr.getWaitingJobs("default").contains(myJob));
			
 
				-  }
			
 
				-}
			
--- a/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestJobTrackerRestartWithCS.java
+++ b/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestJobTrackerRestartWithCS.java
@@ -0,0 +1,85 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import java.util.Properties;
			
 
				+import org.apache.hadoop.mapred.ControlledMapReduceJob.ControlledMapReduceJobRunner;
			
 
				+import org.junit.*;
			
 
				+
			
 
				+/**UNTIL MAPREDUCE-873 is backported, we will not run recovery manager tests
			
 
				+ */
			
 
				+@Ignore
			
 
				+public class TestJobTrackerRestartWithCS extends ClusterWithCapacityScheduler {
			
 
				+
			
 
				+  /**
			
 
				+   * Test single queue.
			
 
				+   *
			
 
				+   * <p>
			
 
				+   *
			
 
				+   * Submit a job with more M/R tasks than total capacity. Full queue capacity
			
 
				+   * should be utilized and remaining M/R tasks should wait for slots to be
			
 
				+   * available.
			
 
				+   *
			
 
				+   * @throws Exception
			
 
				+   */
			
 
				+  public void testJobTrackerRestartWithCS()
			
 
				+          throws Exception {
			
 
				+    try {
			
 
				+      Properties schedulerProps = new Properties();
			
 
				+      schedulerProps.put(
			
 
				+              "mapred.capacity-scheduler.queue.default.guaranteed-capacity", "100");
			
 
				+      Properties clusterProps = new Properties();
			
 
				+      clusterProps.put("mapred.tasktracker.map.tasks.maximum", String.valueOf(2));
			
 
				+      clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String.valueOf(0));
			
 
				+
			
 
				+      // cluster capacity 2 maps, 0 reduces
			
 
				+      startCluster(1, clusterProps, schedulerProps);
			
 
				+
			
 
				+      ControlledMapReduceJobRunner jobRunner =
			
 
				+              ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
			
 
				+              getJobConf(), 4, 0);
			
 
				+      jobRunner.start();
			
 
				+      ControlledMapReduceJob controlledJob = jobRunner.getJob();
			
 
				+      JobID myJobID = jobRunner.getJobID();
			
 
				+      JobInProgress myJob = getJobTracker().getJob(myJobID);
			
 
				+      ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 2);
			
 
				+
			
 
				+      LOG.info("Trying to finish 2 maps");
			
 
				+      controlledJob.finishNTasks(true, 2);
			
 
				+      ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 2);
			
 
				+      assertTrue("Number of maps finished", myJob.finishedMaps() == 2);
			
 
				+
			
 
				+      JobClient jobClient = new JobClient(getMrCluster().createJobConf());
			
 
				+      getMrCluster().stopJobTracker();
			
 
				+
			
 
				+      getMrCluster().getJobTrackerConf().setBoolean("mapred.jobtracker.restart.recover",
			
 
				+              true);
			
 
				+      getMrCluster().startJobTracker();
			
 
				+
			
 
				+      UtilsForTests.waitForJobTracker(jobClient);
			
 
				+      ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 1);
			
 
				+
			
 
				+      controlledJob.finishNTasks(true, 2);
			
 
				+      ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 2);
			
 
				+    } catch (Exception e) {
			
 
				+      e.printStackTrace();
			
 
				+    } finally {
			
 
				+      tearDown();
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestQueueCapacities.java
+++ b/src/contrib/capacity-scheduler/src/test/org/apache/hadoop/mapred/TestQueueCapacities.java
@@ -1,440 +0,0 @@
 
				-/**
			
 
				- * Licensed to the Apache Software Foundation (ASF) under one
			
 
				- * or more contributor license agreements.  See the NOTICE file
			
 
				- * distributed with this work for additional information
			
 
				- * regarding copyright ownership.  The ASF licenses this file
			
 
				- * to you under the Apache License, Version 2.0 (the
			
 
				- * "License"); you may not use this file except in compliance
			
 
				- * with the License.  You may obtain a copy of the License at
			
 
				- *
			
 
				- *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				- *
			
 
				- * Unless required by applicable law or agreed to in writing, software
			
 
				- * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				- * See the License for the specific language governing permissions and
			
 
				- * limitations under the License.
			
 
				- */
			
 
				-
			
 
				-package org.apache.hadoop.mapred;
			
 
				-
			
 
				-import java.util.Properties;
			
 
				-import org.apache.hadoop.mapred.ControlledMapReduceJob.ControlledMapReduceJobRunner;
			
 
				-
			
 
				-/**
			
 
				- * End to end tests based on MiniMRCluster to verify that queue capacities are
			
 
				- * honored. Automates the tests related to queue capacities: submits jobs to
			
 
				- * different queues simultaneously and ensures that capacities are honored
			
 
				- */
			
 
				-public class TestQueueCapacities extends ClusterWithCapacityScheduler {
			
 
				-
			
 
				-  /**
			
 
				-   * Test single queue.
			
 
				-   * 
			
 
				-   * <p>
			
 
				-   * 
			
 
				-   * Submit a job with more M/R tasks than total capacity. Full queue capacity
			
 
				-   * should be utilized and remaining M/R tasks should wait for slots to be
			
 
				-   * available.
			
 
				-   * 
			
 
				-   * @throws Exception
			
 
				-   */
			
 
				-  public void testSingleQueue()
			
 
				-      throws Exception {
			
 
				-
			
 
				-    Properties schedulerProps = new Properties();
			
 
				-    schedulerProps.put(
			
 
				-        "mapred.capacity-scheduler.queue.default.guaranteed-capacity", "100");
			
 
				-    Properties clusterProps = new Properties();
			
 
				-    clusterProps
			
 
				-        .put("mapred.tasktracker.map.tasks.maximum", String.valueOf(3));
			
 
				-    clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String
			
 
				-        .valueOf(3));
			
 
				-    // cluster capacity 12 maps, 12 reduces
			
 
				-    startCluster(4, clusterProps, schedulerProps);
			
 
				-
			
 
				-    ControlledMapReduceJobRunner jobRunner =
			
 
				-        ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
			
 
				-            getJobConf(), 16, 16);
			
 
				-    jobRunner.start();
			
 
				-    ControlledMapReduceJob controlledJob = jobRunner.getJob();
			
 
				-    JobID myJobID = jobRunner.getJobID();
			
 
				-    JobInProgress myJob = getJobTracker().getJob(myJobID);
			
 
				-
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 12);
			
 
				-
			
 
				-    // Wait till the cluster reaches steady state. This confirms that the rest
			
 
				-    // of the tasks are not running and waiting for slots
			
 
				-    // to be freed.
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-
			
 
				-    LOG.info("Trying to finish 2 maps");
			
 
				-    controlledJob.finishNTasks(true, 2);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 2);
			
 
				-    assertTrue("Number of maps finished", myJob.finishedMaps() == 2);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 12);
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-
			
 
				-    LOG.info("Trying to finish 2 more maps");
			
 
				-    controlledJob.finishNTasks(true, 2);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 4);
			
 
				-    assertTrue("Number of maps finished", myJob.finishedMaps() == 4);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 12);
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-
			
 
				-    LOG.info("Trying to finish the last 12 maps");
			
 
				-    controlledJob.finishNTasks(true, 12);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 16);
			
 
				-    assertTrue("Number of maps finished", myJob.finishedMaps() == 16);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 0);
			
 
				-    ControlledMapReduceJob.haveAllTasksFinished(myJob, true);
			
 
				-
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, false, 12);
			
 
				-    waitTillAllSlotsAreOccupied(false);
			
 
				-
			
 
				-    LOG.info("Trying to finish 4 reduces");
			
 
				-    controlledJob.finishNTasks(false, 4);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, false, 4);
			
 
				-    assertTrue("Number of reduces finished", myJob.finishedReduces() == 4);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, false, 12);
			
 
				-    waitTillAllSlotsAreOccupied(false);
			
 
				-
			
 
				-    LOG.info("Trying to finish the last 12 reduces");
			
 
				-    controlledJob.finishNTasks(false, 12);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, false, 16);
			
 
				-    assertTrue("Number of reduces finished", myJob.finishedReduces() == 16);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, false, 0);
			
 
				-    ControlledMapReduceJob.haveAllTasksFinished(myJob, false);
			
 
				-
			
 
				-    jobRunner.join();
			
 
				-  }
			
 
				-
			
 
				-  /**
			
 
				-   * Test single queue with multiple jobs.
			
 
				-   * 
			
 
				-   * @throws Exception
			
 
				-   */
			
 
				-  public void testSingleQueueMultipleJobs()
			
 
				-      throws Exception {
			
 
				-
			
 
				-    Properties schedulerProps = new Properties();
			
 
				-    schedulerProps.put(
			
 
				-        "mapred.capacity-scheduler.queue.default.guaranteed-capacity", "100");
			
 
				-    Properties clusterProps = new Properties();
			
 
				-    clusterProps
			
 
				-        .put("mapred.tasktracker.map.tasks.maximum", String.valueOf(3));
			
 
				-    clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String
			
 
				-        .valueOf(0));
			
 
				-    // cluster capacity 12 maps, 0 reduces
			
 
				-    startCluster(4, clusterProps, schedulerProps);
			
 
				-
			
 
				-    singleQMultipleJobs1();
			
 
				-    singleQMultipleJobs2();
			
 
				-  }
			
 
				-
			
 
				-  /**
			
 
				-   * Test multiple queues.
			
 
				-   * 
			
 
				-   * These tests use 4 queues default, Q2, Q3 and Q4 with guaranteed capacities
			
 
				-   * 10, 20, 30, 40 respectively), user limit 100%, priority not respected, one
			
 
				-   * user per queue. Reclaim time 5 minutes.
			
 
				-   * 
			
 
				-   * @throws Exception
			
 
				-   */
			
 
				-  public void testMultipleQueues()
			
 
				-      throws Exception {
			
 
				-    Properties schedulerProps = new Properties();
			
 
				-    String[] queues = new String[] { "default", "Q2", "Q3", "Q4" };
			
 
				-    int GC = 0;
			
 
				-    for (String q : queues) {
			
 
				-      GC += 10;
			
 
				-      schedulerProps.put(CapacitySchedulerConf.toFullPropertyName(q,
			
 
				-          "guaranteed-capacity"), String.valueOf(GC)); // TODO: use strings
			
 
				-      schedulerProps.put(CapacitySchedulerConf.toFullPropertyName(q,
			
 
				-          "minimum-user-limit-percent"), String.valueOf(100));
			
 
				-      schedulerProps.put(CapacitySchedulerConf.toFullPropertyName(q,
			
 
				-          "reclaim-time-limit"), String.valueOf(300));
			
 
				-    }
			
 
				-
			
 
				-    Properties clusterProps = new Properties();
			
 
				-    clusterProps
			
 
				-        .put("mapred.tasktracker.map.tasks.maximum", String.valueOf(2));
			
 
				-    clusterProps.put("mapred.tasktracker.reduce.tasks.maximum", String
			
 
				-        .valueOf(2));
			
 
				-    clusterProps.put("mapred.queue.names", queues[0] + "," + queues[1] + ","
			
 
				-        + queues[2] + "," + queues[3]);
			
 
				-
			
 
				-    // cluster capacity 10 maps, 10 reduces and 4 queues with capacities 1, 2,
			
 
				-    // 3, 4 respectively.
			
 
				-    startCluster(5, clusterProps, schedulerProps);
			
 
				-
			
 
				-    multipleQsWithOneQBeyondCapacity(queues);
			
 
				-    multipleQueuesWithinCapacities(queues);
			
 
				-  }
			
 
				-
			
 
				-  /**
			
 
				-   * Submit a job with more M/R tasks than total queue capacity and then submit
			
 
				-   * another job. First job utilizes all the slots. When the second job is
			
 
				-   * submitted, the tasks of the second job wait for slots to be available. As
			
 
				-   * the tasks of the first jobs finish and there are no more tasks pending, the
			
 
				-   * tasks of the second job start running on the freed up slots.
			
 
				-   * 
			
 
				-   * @throws Exception
			
 
				-   */
			
 
				-  private void singleQMultipleJobs1()
			
 
				-      throws Exception {
			
 
				-
			
 
				-    ControlledMapReduceJobRunner jobRunner1 =
			
 
				-        ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
			
 
				-            getJobConf(), 16, 0);
			
 
				-    ControlledMapReduceJobRunner jobRunner2 =
			
 
				-        ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
			
 
				-            getJobConf(), 12, 0);
			
 
				-    jobRunner1.start();
			
 
				-    ControlledMapReduceJob controlledJob1 = jobRunner1.getJob();
			
 
				-    JobID jobID1 = jobRunner1.getJobID();
			
 
				-    JobInProgress jip1 = getJobTracker().getJob(jobID1);
			
 
				-
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 12);
			
 
				-
			
 
				-    // Confirm that the rest of the tasks are not running and waiting for slots
			
 
				-    // to be freed.
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-
			
 
				-    // Now start the second job.
			
 
				-    jobRunner2.start();
			
 
				-    JobID jobID2 = jobRunner2.getJobID();
			
 
				-    ControlledMapReduceJob controlledJob2 = jobRunner2.getJob();
			
 
				-    JobInProgress jip2 = getJobTracker().getJob(jobID2);
			
 
				-
			
 
				-    LOG.info("Trying to finish 2 map");
			
 
				-    controlledJob1.finishNTasks(true, 2);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 2);
			
 
				-    assertTrue("Number of maps finished", jip1.finishedMaps() == 2);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 12);
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-
			
 
				-    LOG.info("Trying to finish 2 more maps");
			
 
				-    controlledJob1.finishNTasks(true, 2);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 4);
			
 
				-    assertTrue("Number of maps finished", jip1.finishedMaps() == 4);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 12);
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-
			
 
				-    // All tasks of Job1 started running/finished. Now job2 should start
			
 
				-    LOG.info("Trying to finish 2 more maps");
			
 
				-    controlledJob1.finishNTasks(true, 2);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 6);
			
 
				-    assertTrue("Number of maps finished", jip1.finishedMaps() == 6);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 10);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip2, true, 2);
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 10);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 2);
			
 
				-
			
 
				-    LOG.info("Trying to finish 10 more maps and hence job1");
			
 
				-    controlledJob1.finishNTasks(true, 10);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 16);
			
 
				-    assertTrue("Number of maps finished", jip1.finishedMaps() == 16);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip2, true, 12);
			
 
				-    controlledJob1.finishJob();
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 0);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 12);
			
 
				-
			
 
				-    // Finish job2 also
			
 
				-    controlledJob2.finishJob();
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(jip2, true, 12);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 0);
			
 
				-
			
 
				-    jobRunner1.join();
			
 
				-    jobRunner2.join();
			
 
				-  }
			
 
				-
			
 
				-  /**
			
 
				-   * Submit a job with less M/R tasks than total capacity and another job with
			
 
				-   * more M/R tasks than the remaining capacity. First job should utilize the
			
 
				-   * required slots and other job should utilize the available slots and its
			
 
				-   * remaining tasks wait for slots to become free.
			
 
				-   * 
			
 
				-   * @throws Exception
			
 
				-   */
			
 
				-  private void singleQMultipleJobs2()
			
 
				-      throws Exception {
			
 
				-
			
 
				-    ControlledMapReduceJobRunner jobRunner1 =
			
 
				-        ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
			
 
				-            getJobConf(), 8, 0);
			
 
				-    ControlledMapReduceJobRunner jobRunner2 =
			
 
				-        ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
			
 
				-            getJobConf(), 12, 0);
			
 
				-    jobRunner1.start();
			
 
				-    ControlledMapReduceJob controlledJob1 = jobRunner1.getJob();
			
 
				-    JobID jobID1 = jobRunner1.getJobID();
			
 
				-    JobInProgress jip1 = getJobTracker().getJob(jobID1);
			
 
				-
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 8);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 8);
			
 
				-
			
 
				-    // Now start the second job.
			
 
				-    jobRunner2.start();
			
 
				-    JobID jobID2 = jobRunner2.getJobID();
			
 
				-    ControlledMapReduceJob controlledJob2 = jobRunner2.getJob();
			
 
				-    JobInProgress jip2 = getJobTracker().getJob(jobID2);
			
 
				-
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip2, true, 4);
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 8);
			
 
				-    // The rest of the tasks of job2 should wait.
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 4);
			
 
				-
			
 
				-    LOG.info("Trying to finish 2 maps of job1");
			
 
				-    controlledJob1.finishNTasks(true, 2);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 2);
			
 
				-    assertTrue("Number of maps finished", jip1.finishedMaps() == 2);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip1, true, 6);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip2, true, 6);
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 6);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 6);
			
 
				-
			
 
				-    LOG.info("Trying to finish 6 more maps of job1");
			
 
				-    controlledJob1.finishNTasks(true, 6);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(jip1, true, 8);
			
 
				-    assertTrue("Number of maps finished", jip1.finishedMaps() == 8);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(jip2, true, 12);
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip1, true, 0);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 12);
			
 
				-
			
 
				-    // Finish job2 also
			
 
				-    controlledJob2.finishJob();
			
 
				-
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(jip2, true, 12);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(jip2, true, 0);
			
 
				-
			
 
				-    jobRunner1.join();
			
 
				-    jobRunner2.join();
			
 
				-  }
			
 
				-
			
 
				-  /**
			
 
				-   * Test to verify running of tasks in a queue going over its capacity. In
			
 
				-   * queue default, user U1 starts a job J1, having more M/R tasks than the
			
 
				-   * total slots. M/R tasks of job J1 should start running on all the nodes (100
			
 
				-   * % utilization).
			
 
				-   * 
			
 
				-   * @throws Exception
			
 
				-   */
			
 
				-  private void multipleQsWithOneQBeyondCapacity(String[] queues)
			
 
				-      throws Exception {
			
 
				-
			
 
				-    JobConf conf = getJobConf();
			
 
				-    conf.setQueueName(queues[0]);
			
 
				-    conf.setUser("U1");
			
 
				-    ControlledMapReduceJobRunner jobRunner =
			
 
				-        ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(conf, 15,
			
 
				-            0);
			
 
				-    jobRunner.start();
			
 
				-    ControlledMapReduceJob controlledJob = jobRunner.getJob();
			
 
				-    JobID myJobID = jobRunner.getJobID();
			
 
				-    JobInProgress myJob = getJobTracker().getJob(myJobID);
			
 
				-
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 10);
			
 
				-
			
 
				-    // Confirm that the rest of the tasks are not running and waiting for slots
			
 
				-    // to be freed.
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(myJob, true, 10);
			
 
				-
			
 
				-    LOG.info("Trying to finish 3 maps");
			
 
				-    controlledJob.finishNTasks(true, 3);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 3);
			
 
				-    assertTrue("Number of maps finished", myJob.finishedMaps() == 3);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 10);
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(myJob, true, 10);
			
 
				-
			
 
				-    LOG.info("Trying to finish 2 more maps");
			
 
				-    controlledJob.finishNTasks(true, 2);
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 5);
			
 
				-    assertTrue("Number of maps finished", myJob.finishedMaps() == 5);
			
 
				-    ControlledMapReduceJob.waitTillNTasksStartRunning(myJob, true, 10);
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(myJob, true, 10);
			
 
				-
			
 
				-    // Finish job
			
 
				-    controlledJob.finishJob();
			
 
				-    ControlledMapReduceJob.waitTillNTotalTasksFinish(myJob, true, 15);
			
 
				-    ControlledMapReduceJob.assertNumTasksRunning(myJob, true, 0);
			
 
				-    jobRunner.join();
			
 
				-  }
			
 
				-
			
 
				-  /**
			
 
				-   * Test to verify queue capacities across multiple queues. In this test, jobs
			
 
				-   * are submitted to different queues - all below the queue's capacity and
			
 
				-   * verifies that all the jobs are running. This will test code paths related
			
 
				-   * to job initialization, considering multiple queues for scheduling jobs etc.
			
 
				-   * 
			
 
				-   * <p>
			
 
				-   * 
			
 
				-   * One user per queue. Four jobs are submitted to the four queues such that
			
 
				-   * they exactly fill up the queues. No queue should be beyond capacity. All
			
 
				-   * jobs should be running.
			
 
				-   * 
			
 
				-   * @throws Exception
			
 
				-   */
			
 
				-  private void multipleQueuesWithinCapacities(String[] queues)
			
 
				-      throws Exception {
			
 
				-    String[] users = new String[] { "U1", "U2", "U3", "U4" };
			
 
				-    ControlledMapReduceJobRunner[] jobRunners =
			
 
				-        new ControlledMapReduceJobRunner[4];
			
 
				-    ControlledMapReduceJob[] controlledJobs = new ControlledMapReduceJob[4];
			
 
				-    JobInProgress[] jips = new JobInProgress[4];
			
 
				-
			
 
				-    // Initialize all the jobs
			
 
				-    // Start all the jobs in parallel
			
 
				-    JobConf conf = getJobConf();
			
 
				-    int numTasks = 1;
			
 
				-    for (int i = 0; i < 4; i++) {
			
 
				-      conf.setQueueName(queues[i]);
			
 
				-      conf.setUser(users[i]);
			
 
				-      jobRunners[i] =
			
 
				-          ControlledMapReduceJobRunner.getControlledMapReduceJobRunner(
			
 
				-              getJobConf(), numTasks, numTasks);
			
 
				-      jobRunners[i].start();
			
 
				-      controlledJobs[i] = jobRunners[i].getJob();
			
 
				-      JobID jobID = jobRunners[i].getJobID();
			
 
				-      jips[i] = getJobTracker().getJob(jobID);
			
 
				-      // Wait till all the jobs start running all of their tasks
			
 
				-      ControlledMapReduceJob.waitTillNTasksStartRunning(jips[i], true,
			
 
				-          numTasks);
			
 
				-      ControlledMapReduceJob.waitTillNTasksStartRunning(jips[i], false,
			
 
				-          numTasks);
			
 
				-      numTasks += 1;
			
 
				-    }
			
 
				-
			
 
				-    // Ensure steady state behavior
			
 
				-    waitTillAllSlotsAreOccupied(true);
			
 
				-    waitTillAllSlotsAreOccupied(false);
			
 
				-    numTasks = 1;
			
 
				-    for (int i = 0; i < 4; i++) {
			
 
				-      ControlledMapReduceJob.assertNumTasksRunning(jips[i], true, numTasks);
			
 
				-      ControlledMapReduceJob.assertNumTasksRunning(jips[i], false, numTasks);
			
 
				-      numTasks += 1;
			
 
				-    }
			
 
				-
			
 
				-    // Finish the jobs and join them
			
 
				-    numTasks = 1;
			
 
				-    for (int i = 0; i < 4; i++) {
			
 
				-      controlledJobs[i].finishJob();
			
 
				-      ControlledMapReduceJob
			
 
				-          .waitTillNTotalTasksFinish(jips[i], true, numTasks);
			
 
				-      ControlledMapReduceJob.assertNumTasksRunning(jips[i], true, 0);
			
 
				-      ControlledMapReduceJob.waitTillNTotalTasksFinish(jips[i], false,
			
 
				-          numTasks);
			
 
				-      ControlledMapReduceJob.assertNumTasksRunning(jips[i], false, 0);
			
 
				-      jobRunners[i].join();
			
 
				-      numTasks += 1;
			
 
				-    }
			
 
				-  }
			
 
				-}
			
--- a/src/contrib/data_join/build.xml
+++ b/src/contrib/data_join/build.xml
@@ -28,9 +28,8 @@ to call at top-level: ant deploy-contrib compile-core-test
 
				   <!-- Override jar target to specify main class -->
			
 
				   <target name="jar" depends="compile">
			
 
				     <jar
			
 
				-      jarfile="${build.dir}/hadoop-${version}-${name}.jar"
			
 
				-      basedir="${build.classes}"      
			
 
				-    >
			
 
				+      jarfile="${build.dir}/hadoop-${name}-${version}.jar"
			
 
				+      basedir="${build.classes}">      
			
 
				   	<manifest>
			
 
				 	    <attribute name="Main-Class" value="org.apache.hadoop.contrib.utils.join.DataJoinJob"/>
			
 
				 	</manifest>
			
--- a/src/contrib/data_join/ivy.xml
+++ b/src/contrib/data_join/ivy.xml
@@ -32,5 +32,13 @@
 
				       name="log4j"
			
 
				       rev="${log4j.version}"
			
 
				       conf="common->master"/>
			
 
				+    <dependency org="commons-configuration"
			
 
				+      name="commons-configuration"
			
 
				+      rev="${commons-configuration.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.apache.commons"
			
 
				+      name="commons-math"
			
 
				+      rev="${commons-math.version}"
			
 
				+      conf="common->default"/>
			
 
				     </dependencies>
			
 
				 </ivy-module>
			
--- a/src/contrib/data_join/ivy/libraries.properties
+++ b/src/contrib/data_join/ivy/libraries.properties
@@ -3,3 +3,5 @@
 
				 
			
 
				 #Please list the dependencies name with version if they are different from the ones 
			
 
				 #listed in the global libraries.properties file (in alphabetical order)
			
 
				+commons-configuration.version=1.6
			
 
				+commons-math.version=2.1
			
--- a/src/contrib/eclipse-plugin/build.xml
+++ b/src/contrib/eclipse-plugin/build.xml
@@ -66,10 +66,10 @@
 
				   <!-- Override jar target to specify manifest -->
			
 
				   <target name="jar" depends="compile" unless="skip.contrib">
			
 
				     <mkdir dir="${build.dir}/lib"/>
			
 
				-    <copy file="${hadoop.root}/build/hadoop-${version}-core.jar" tofile="${build.dir}/lib/hadoop-core.jar" verbose="true"/>
			
 
				+    <copy file="${hadoop.root}/build/hadoop-core-${version}.jar" tofile="${build.dir}/lib/hadoop-core.jar" verbose="true"/>
			
 
				     <copy file="${hadoop.root}/build/ivy/lib/Hadoop/common/commons-cli-${commons-cli.version}.jar"  todir="${build.dir}/lib" verbose="true"/>
			
 
				     <jar
			
 
				-      jarfile="${build.dir}/hadoop-${version}-${name}.jar"
			
 
				+      jarfile="${build.dir}/hadoop-${name}-${version}.jar"
			
 
				       manifest="${root}/META-INF/MANIFEST.MF">
			
 
				       <fileset dir="${build.dir}" includes="classes/ lib/"/>
			
 
				       <fileset dir="${root}" includes="resources/ plugin.xml"/>
			
--- a/src/contrib/failmon/build.xml
+++ b/src/contrib/failmon/build.xml
@@ -21,7 +21,7 @@
 
				 
			
 
				   <import file="../build-contrib.xml"/>
			
 
				 
			
 
				-  <property name="jarfile" value="${build.dir}/${name}.jar"/>
			
 
				+  <property name="jarfile" value="${build.dir}/hadoop-${name}-${version}.jar"/>
			
 
				 
			
 
				   <target name="jar" depends="compile" unless="skip.contrib">
			
 
				     <!-- Make sure that the hadoop jar has been created -->
			
@@ -113,7 +113,7 @@
 
				     <delete file="${name}.jar"/>
			
 
				 
			
 
				     <move file="${name}.tar.gz" todir="${build.dir}"/>
			
 
				-    <echo message= "${hadoop.root}/build/contrib/failmon/${name}.jar"/>
			
 
				+    <echo message= "${hadoop.root}/build/contrib/failmon/hadoop-${name}-${version}.jar"/>
			
 
				     
			
 
				   </target>
			
 
				   
			
--- a/src/contrib/fairscheduler/ivy.xml
+++ b/src/contrib/fairscheduler/ivy.xml
@@ -30,13 +30,25 @@
 
				       name="log4j"
			
 
				       rev="${log4j.version}"
			
 
				       conf="common->master"/>
			
 
				-    <dependency org="org.mortbay.jetty"
			
 
				-      name="servlet-api-2.5"
			
 
				-      rev="${servlet-api-2.5.version}"
			
 
				-      conf="common->default"/> 
			
 
				    <dependency org="junit"
			
 
				       name="junit"
			
 
				       rev="${junit.version}"
			
 
				       conf="common->default"/>
			
 
				+    <dependency org="org.mortbay.jetty"
			
 
				+      name="jetty-util"
			
 
				+      rev="${jetty-util.version}"
			
 
				+      conf="common->master"/>
			
 
				+    <dependency org="org.mortbay.jetty"
			
 
				+      name="jetty"
			
 
				+      rev="${jetty.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.mortbay.jetty"
			
 
				+      name="jsp-api-2.1"
			
 
				+      rev="${jsp-api-2.1.version}"
			
 
				+      conf="common->master"/>
			
 
				+    <dependency org="org.mortbay.jetty"
			
 
				+      name="jsp-2.1"
			
 
				+      rev="${jsp-2.1.version}"
			
 
				+      conf="common->master"/>
			
 
				   </dependencies>
			
 
				 </ivy-module>
			
--- a/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/CapBasedLoadManager.java
+++ b/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/CapBasedLoadManager.java
@@ -40,13 +40,13 @@ public class CapBasedLoadManager extends LoadManager {
 
				   public boolean canAssignMap(TaskTrackerStatus tracker,
			
 
				       int totalRunnableMaps, int totalMapSlots) {
			
 
				     return tracker.countMapTasks() < getCap(totalRunnableMaps,
			
 
				-        tracker.getMaxMapTasks(), totalMapSlots);
			
 
				+        tracker.getMaxMapSlots(), totalMapSlots);
			
 
				   }
			
 
				 
			
 
				   @Override
			
 
				   public boolean canAssignReduce(TaskTrackerStatus tracker,
			
 
				       int totalRunnableReduces, int totalReduceSlots) {
			
 
				     return tracker.countReduceTasks() < getCap(totalRunnableReduces,
			
 
				-        tracker.getMaxReduceTasks(), totalReduceSlots);
			
 
				+        tracker.getMaxReduceSlots(), totalReduceSlots);
			
 
				   }
			
 
				 }
			
--- a/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java
+++ b/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.conf.Configuration;
 
				 import org.apache.hadoop.http.HttpServer;
			
 
				 import org.apache.hadoop.mapred.JobStatus;
			
 
				 import org.apache.hadoop.util.ReflectionUtils;
			
 
				+import org.apache.hadoop.mapreduce.server.jobtracker.TaskTracker;
			
 
				 
			
 
				 /**
			
 
				  * A {@link TaskScheduler} that implements fair sharing.
			
@@ -86,15 +87,6 @@ public class FairScheduler extends TaskScheduler {
 
				     double reduceFairShare = 0; // Fair share of reduce slots at last update
			
 
				   }
			
 
				   
			
 
				-  /**
			
 
				-   * A clock class - can be mocked out for testing.
			
 
				-   */
			
 
				-  static class Clock {
			
 
				-    long getTime() {
			
 
				-      return System.currentTimeMillis();
			
 
				-    }
			
 
				-  }
			
 
				-  
			
 
				   public FairScheduler() {
			
 
				     this(new Clock(), true);
			
 
				   }
			
@@ -218,7 +210,7 @@ public class FairScheduler extends TaskScheduler {
 
				   }
			
 
				   
			
 
				   @Override
			
 
				-  public synchronized List<Task> assignTasks(TaskTrackerStatus tracker)
			
 
				+  public synchronized List<Task> assignTasks(TaskTracker tracker)
			
 
				       throws IOException {
			
 
				     if (!initialized) // Don't try to assign tasks if we haven't yet started up
			
 
				       return null;
			
@@ -244,10 +236,11 @@ public class FairScheduler extends TaskScheduler {
 
				     // Scan to see whether any job needs to run a map, then a reduce
			
 
				     ArrayList<Task> tasks = new ArrayList<Task>();
			
 
				     TaskType[] types = new TaskType[] {TaskType.MAP, TaskType.REDUCE};
			
 
				+    TaskTrackerStatus trackerStatus = tracker.getStatus();
			
 
				     for (TaskType taskType: types) {
			
 
				       boolean canAssign = (taskType == TaskType.MAP) ? 
			
 
				-          loadMgr.canAssignMap(tracker, runnableMaps, totalMapSlots) :
			
 
				-          loadMgr.canAssignReduce(tracker, runnableReduces, totalReduceSlots);
			
 
				+          loadMgr.canAssignMap(trackerStatus, runnableMaps, totalMapSlots) :
			
 
				+          loadMgr.canAssignReduce(trackerStatus, runnableReduces, totalReduceSlots);
			
 
				       if (canAssign) {
			
 
				         // Figure out the jobs that need this type of task
			
 
				         List<JobInProgress> candidates = new ArrayList<JobInProgress>();
			
@@ -263,8 +256,8 @@ public class FairScheduler extends TaskScheduler {
 
				         Collections.sort(candidates, comparator);
			
 
				         for (JobInProgress job: candidates) {
			
 
				           Task task = (taskType == TaskType.MAP ? 
			
 
				-              taskSelector.obtainNewMapTask(tracker, job) :
			
 
				-              taskSelector.obtainNewReduceTask(tracker, job));
			
 
				+              taskSelector.obtainNewMapTask(trackerStatus, job) :
			
 
				+              taskSelector.obtainNewReduceTask(trackerStatus, job));
			
 
				           if (task != null) {
			
 
				             // Update the JobInfo for this job so we account for the launched
			
 
				             // tasks during this update interval and don't try to launch more
			
@@ -412,7 +405,8 @@ public class FairScheduler extends TaskScheduler {
 
				       int totalMaps = job.numMapTasks;
			
 
				       int finishedMaps = 0;
			
 
				       int runningMaps = 0;
			
 
				-      for (TaskInProgress tip: job.getMapTasks()) {
			
 
				+      for (TaskInProgress tip : 
			
 
				+           job.getTasks(org.apache.hadoop.mapreduce.TaskType.MAP)) {
			
 
				         if (tip.isComplete()) {
			
 
				           finishedMaps += 1;
			
 
				         } else if (tip.isRunning()) {
			
@@ -426,7 +420,8 @@ public class FairScheduler extends TaskScheduler {
 
				       int totalReduces = job.numReduceTasks;
			
 
				       int finishedReduces = 0;
			
 
				       int runningReduces = 0;
			
 
				-      for (TaskInProgress tip: job.getReduceTasks()) {
			
 
				+      for (TaskInProgress tip : 
			
 
				+           job.getTasks(org.apache.hadoop.mapreduce.TaskType.REDUCE)) {
			
 
				         if (tip.isComplete()) {
			
 
				           finishedReduces += 1;
			
 
				         } else if (tip.isRunning()) {
			
--- a/src/contrib/fairscheduler/src/test/org/apache/hadoop/mapred/TestFairScheduler.java
+++ b/src/contrib/fairscheduler/src/test/org/apache/hadoop/mapred/TestFairScheduler.java
@@ -33,6 +33,9 @@ import junit.framework.TestCase;
 
				 import org.apache.hadoop.io.BytesWritable;
			
 
				 import org.apache.hadoop.mapred.JobStatus;
			
 
				 import org.apache.hadoop.mapred.FairScheduler.JobInfo;
			
 
				+import org.apache.hadoop.mapreduce.server.jobtracker.TaskTracker;
			
 
				+import org.apache.hadoop.mapreduce.split.JobSplit;
			
 
				+import org.apache.hadoop.mapred.UtilsForTests.FakeClock;
			
 
				 
			
 
				 public class TestFairScheduler extends TestCase {
			
 
				   final static String TEST_DIR = new File(System.getProperty("test.build.data",
			
@@ -50,8 +53,9 @@ public class TestFairScheduler extends TestCase {
 
				     private FakeTaskTrackerManager taskTrackerManager;
			
 
				     
			
 
				     public FakeJobInProgress(JobConf jobConf,
			
 
				-        FakeTaskTrackerManager taskTrackerManager) throws IOException {
			
 
				-      super(new JobID("test", ++jobCounter), jobConf);
			
 
				+        FakeTaskTrackerManager taskTrackerManager, 
			
 
				+        JobTracker jt) throws IOException {
			
 
				+      super(new JobID("test", ++jobCounter), jobConf, jt);
			
 
				       this.taskTrackerManager = taskTrackerManager;
			
 
				       this.startTime = System.currentTimeMillis();
			
 
				       this.status = new JobStatus();
			
@@ -67,7 +71,8 @@ public class TestFairScheduler extends TestCase {
 
				     public Task obtainNewMapTask(final TaskTrackerStatus tts, int clusterSize,
			
 
				         int ignored) throws IOException {
			
 
				       TaskAttemptID attemptId = getTaskAttemptID(true);
			
 
				-      Task task = new MapTask("", attemptId, 0, "", new BytesWritable()) {
			
 
				+      Task task = new MapTask("", attemptId, 0, new JobSplit.TaskSplitIndex(),
			
 
				+          1) {
			
 
				         @Override
			
 
				         public String toString() {
			
 
				           return String.format("%s on %s", getTaskID(), tts.getTrackerName());
			
@@ -82,7 +87,7 @@ public class TestFairScheduler extends TestCase {
 
				     public Task obtainNewReduceTask(final TaskTrackerStatus tts,
			
 
				         int clusterSize, int ignored) throws IOException {
			
 
				       TaskAttemptID attemptId = getTaskAttemptID(false);
			
 
				-      Task task = new ReduceTask("", attemptId, 0, 10) {
			
 
				+      Task task = new ReduceTask("", attemptId, 0, 10, 1) {
			
 
				         @Override
			
 
				         public String toString() {
			
 
				           return String.format("%s on %s", getTaskID(), tts.getTrackerName());
			
@@ -108,18 +113,26 @@ public class TestFairScheduler extends TestCase {
 
				     List<JobInProgressListener> listeners =
			
 
				       new ArrayList<JobInProgressListener>();
			
 
				     
			
 
				-    private Map<String, TaskTrackerStatus> trackers =
			
 
				-      new HashMap<String, TaskTrackerStatus>();
			
 
				+    private Map<String, TaskTracker> trackers =
			
 
				+      new HashMap<String, TaskTracker>();
			
 
				     private Map<String, TaskStatus> taskStatuses = 
			
 
				       new HashMap<String, TaskStatus>();
			
 
				 
			
 
				     public FakeTaskTrackerManager() {
			
 
				-      trackers.put("tt1", new TaskTrackerStatus("tt1", "tt1.host", 1,
			
 
				-          new ArrayList<TaskStatus>(), 0,
			
 
				-          maxMapTasksPerTracker, maxReduceTasksPerTracker));
			
 
				-      trackers.put("tt2", new TaskTrackerStatus("tt2", "tt2.host", 2,
			
 
				-          new ArrayList<TaskStatus>(), 0,
			
 
				-          maxMapTasksPerTracker, maxReduceTasksPerTracker));
			
 
				+      TaskTracker tt1 = new TaskTracker("tt1");
			
 
				+      tt1.setStatus(new TaskTrackerStatus("tt1", "tt1.host", 1,
			
 
				+                                          new ArrayList<TaskStatus>(), 0,
			
 
				+                                          maxMapTasksPerTracker, 
			
 
				+                                          maxReduceTasksPerTracker));
			
 
				+      trackers.put("tt1", tt1);
			
 
				+      
			
 
				+      TaskTracker tt2 = new TaskTracker("tt2");
			
 
				+      tt2.setStatus(new TaskTrackerStatus("tt2", "tt2.host", 2,
			
 
				+                                          new ArrayList<TaskStatus>(), 0,
			
 
				+                                          maxMapTasksPerTracker, 
			
 
				+                                          maxReduceTasksPerTracker));
			
 
				+      trackers.put("tt2", tt2);
			
 
				+
			
 
				     }
			
 
				     
			
 
				     @Override
			
@@ -143,7 +156,11 @@ public class TestFairScheduler extends TestCase {
 
				 
			
 
				     @Override
			
 
				     public Collection<TaskTrackerStatus> taskTrackers() {
			
 
				-      return trackers.values();
			
 
				+      List<TaskTrackerStatus> statuses = new ArrayList<TaskTrackerStatus>();
			
 
				+      for (TaskTracker tt : trackers.values()) {
			
 
				+        statuses.add(tt.getStatus());
			
 
				+      }
			
 
				+      return statuses;
			
 
				     }
			
 
				 
			
 
				 
			
@@ -188,7 +205,7 @@ public class TestFairScheduler extends TestCase {
 
				       }
			
 
				     }
			
 
				     
			
 
				-    public TaskTrackerStatus getTaskTracker(String trackerID) {
			
 
				+    public TaskTracker getTaskTracker(String trackerID) {
			
 
				       return trackers.get(trackerID);
			
 
				     }
			
 
				     
			
@@ -206,7 +223,7 @@ public class TestFairScheduler extends TestCase {
 
				       };
			
 
				       taskStatuses.put(t.getTaskID().toString(), status);
			
 
				       status.setRunState(TaskStatus.State.RUNNING);
			
 
				-      trackers.get(taskTrackerName).getTaskReports().add(status);
			
 
				+      trackers.get(taskTrackerName).getStatus().getTaskReports().add(status);
			
 
				     }
			
 
				     
			
 
				     public void finishTask(String taskTrackerName, String tipId) {
			
@@ -220,19 +237,6 @@ public class TestFairScheduler extends TestCase {
 
				     }
			
 
				   }
			
 
				   
			
 
				-  protected class FakeClock extends FairScheduler.Clock {
			
 
				-    private long time = 0;
			
 
				-    
			
 
				-    public void advance(long millis) {
			
 
				-      time += millis;
			
 
				-    }
			
 
				-
			
 
				-    @Override
			
 
				-    long getTime() {
			
 
				-      return time;
			
 
				-    }
			
 
				-  }
			
 
				-  
			
 
				   protected JobConf conf;
			
 
				   protected FairScheduler scheduler;
			
 
				   private FakeTaskTrackerManager taskTrackerManager;
			
@@ -279,7 +283,8 @@ public class TestFairScheduler extends TestCase {
 
				     jobConf.setNumReduceTasks(reduces);
			
 
				     if (pool != null)
			
 
				       jobConf.set(POOL_PROPERTY, pool);
			
 
				-    JobInProgress job = new FakeJobInProgress(jobConf, taskTrackerManager);
			
 
				+    JobInProgress job = new FakeJobInProgress(jobConf, taskTrackerManager,
			
 
				+        UtilsForTests.getJobTracker());
			
 
				     job.getStatus().setRunState(state);
			
 
				     taskTrackerManager.submitJob(job);
			
 
				     job.startTime = clock.time;
			
@@ -499,7 +504,8 @@ public class TestFairScheduler extends TestCase {
 
				     // Finish up the tasks and advance time again. Note that we must finish
			
 
				     // the task since FakeJobInProgress does not properly maintain running
			
 
				     // tasks, so the scheduler will always get an empty task list from
			
 
				-    // the JobInProgress's getMapTasks/getReduceTasks and think they finished.
			
 
				+    // the JobInProgress's getTasks(TaskType.MAP)/getTasks(TaskType.REDUCE) and 
			
 
				+    // think they finished.
			
 
				     taskTrackerManager.finishTask("tt1", "attempt_test_0001_m_000001_0");
			
 
				     taskTrackerManager.finishTask("tt1", "attempt_test_0001_m_000002_0");
			
 
				     taskTrackerManager.finishTask("tt1", "attempt_test_0001_r_000003_0");
			
@@ -1227,7 +1233,7 @@ public class TestFairScheduler extends TestCase {
 
				     scheduler.update();
			
 
				   }
			
 
				 
			
 
				-  protected TaskTrackerStatus tracker(String taskTrackerName) {
			
 
				+  protected TaskTracker tracker(String taskTrackerName) {
			
 
				     return taskTrackerManager.getTaskTracker(taskTrackerName);
			
 
				   }
			
 
				   
			
--- a/src/contrib/fuse-dfs/ivy.xml
+++ b/src/contrib/fuse-dfs/ivy.xml
@@ -32,6 +32,13 @@
 
				       name="log4j"
			
 
				       rev="${log4j.version}"
			
 
				       conf="common->master"/>
			
 
				+    <dependency org="commons-configuration"
			
 
				+      name="commons-configuration"
			
 
				+      rev="${commons-configuration.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.apache.commons"
			
 
				+      name="commons-math"
			
 
				+      rev="${commons-math.version}"
			
 
				+      conf="common->default"/>
			
 
				     </dependencies>
			
 
				-  
			
 
				 </ivy-module>
			
--- a/src/contrib/fuse-dfs/ivy/libraries.properties
+++ b/src/contrib/fuse-dfs/ivy/libraries.properties
@@ -3,3 +3,5 @@
 
				 
			
 
				 #Please list the dependencies name with version if they are different from the ones 
			
 
				 #listed in the global libraries.properties file (in alphabetical order)
			
 
				+commons-configuration.version=1.6
			
 
				+commons-math.version=2.1
			
--- a/src/contrib/gridmix/README
+++ b/src/contrib/gridmix/README
@@ -0,0 +1,22 @@
 
				+# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+# contributor license agreements.  See the NOTICE file distributed with
			
 
				+# this work for additional information regarding copyright ownership.
			
 
				+# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+# (the "License"); you may not use this file except in compliance with
			
 
				+# the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+# Unless required by applicable law or agreed to in writing, software
			
 
				+# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+# See the License for the specific language governing permissions and
			
 
				+# limitations under the License.
			
 
				+
			
 
				+This project implements the third version of Gridmix, a benchmark for live
			
 
				+clusters. Given a description of jobs (a "trace") annotated with information
			
 
				+about I/O, memory, etc. a synthetic mix of jobs will be generated and submitted
			
 
				+to the cluster.
			
 
				+
			
 
				+Documentation of usage and configuration properties in forrest is available in
			
 
				+src/docs/src/documentation/content/xdocs/gridmix.xml
			
--- a/src/core/org/apache/hadoop/metrics/spi/package.html
+++ b/src/core/org/apache/hadoop/metrics/spi/package.html
@@ -1,6 +1,4 @@
 
				-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
			
 
				-<html>
			
 
				-
			
 
				+<?xml version="1.0" ?>
			
 
				 <!--
			
 
				    Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				    contributor license agreements.  See the NOTICE file distributed with
			
@@ -18,19 +16,18 @@
 
				    limitations under the License.
			
 
				 -->
			
 
				 
			
 
				-  <head>
			
 
				-    <title>org.apache.hadoop.metrics.spi</title>
			
 
				-  </head>
			
 
				-  <body>
			
 
				-The Service Provider Interface for the Metrics API.  This package provides
			
 
				-an interface allowing a variety of metrics reporting implementations to be
			
 
				-plugged in to the Metrics API.  Examples of such implementations can be found 
			
 
				-in the packages <code>org.apache.hadoop.metrics.file</code> and
			
 
				-<code>org.apache.hadoop.metrics.ganglia</code>.<p/>
			
 
				+<project name="gridmix" default="jar">
			
 
				+
			
 
				+  <import file="../build-contrib.xml"/>
			
 
				+
			
 
				+   <!-- Run all unit tests. superdottest -->
			
 
				+  <target name="test">
			
 
				+   <antcall target="hadoopbuildcontrib.test" />
			
 
				+  </target>
			
 
				+
			
 
				+  <!--Run all system tests.-->
			
 
				+  <target name="test-system">
			
 
				+    <antcall target="hadoopbuildcontrib.test-system" />
			
 
				+   </target>
			
 
				 
			
 
				-Plugging in an implementation involves writing a concrete subclass of 
			
 
				-<code>AbstractMetricsContext</code>.  The subclass should get its
			
 
				- configuration information using the <code>getAttribute(<i>attributeName</i>)</code>
			
 
				- method.
			
 
				-  </body>
			
 
				-</html>
			
 
				+</project>
			
--- a/src/contrib/gridmix/ivy.xml
+++ b/src/contrib/gridmix/ivy.xml
@@ -0,0 +1,101 @@
 
				+<?xml version="1.0" ?>
			
 
				+<!--
			
 
				+   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+   contributor license agreements.  See the NOTICE file distributed with
			
 
				+   this work for additional information regarding copyright ownership.
			
 
				+   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+   (the "License"); you may not use this file except in compliance with
			
 
				+   the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
 
				+-->
			
 
				+<ivy-module version="1.0">
			
 
				+  <info organisation="org.apache.hadoop" module="${ant.project.name}">
			
 
				+    <license name="Apache 2.0"/>
			
 
				+    <description>Rumen</description>
			
 
				+  </info>
			
 
				+  <configurations defaultconfmapping="default">
			
 
				+    <!--these match the Maven configurations-->
			
 
				+    <conf name="default" extends="master,runtime"/>
			
 
				+    <conf name="master" description="contains the artifact but no dependencies"/>
			
 
				+    <conf name="runtime" description="runtime but not the artifact" />
			
 
				+
			
 
				+    <conf name="common" visibility="private" extends="runtime"
			
 
				+      description="artifacts needed to compile/test the application"/>
			
 
				+  </configurations>
			
 
				+
			
 
				+  <publications>
			
 
				+    <!--get the artifact from our module name-->
			
 
				+    <artifact conf="master"/>
			
 
				+  </publications>
			
 
				+  <dependencies>
			
 
				+    <dependency org="commons-logging"
			
 
				+      name="commons-logging"
			
 
				+      rev="${commons-logging.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="log4j"
			
 
				+      name="log4j"
			
 
				+      rev="${log4j.version}"
			
 
				+      conf="common->master"/>
			
 
				+    <dependency org="junit"
			
 
				+      name="junit"
			
 
				+      rev="${junit.version}"
			
 
				+      conf="common->default"/>
			
 
				+
			
 
				+    <!-- necessary for Mini*Clusters -->
			
 
				+    <dependency org="commons-httpclient"
			
 
				+      name="commons-httpclient"
			
 
				+      rev="${commons-httpclient.version}"
			
 
				+      conf="common->master"/>
			
 
				+    <dependency org="commons-codec"
			
 
				+      name="commons-codec"
			
 
				+      rev="${commons-codec.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="commons-net"
			
 
				+      name="commons-net"
			
 
				+      rev="${commons-net.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.mortbay.jetty"
			
 
				+      name="jetty"
			
 
				+      rev="${jetty.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.mortbay.jetty"
			
 
				+      name="jetty-util"
			
 
				+      rev="${jetty-util.version}"
			
 
				+      conf="common->master"/>
			
 
				+    <dependency org="org.mortbay.jetty"
			
 
				+      name="jsp-api-2.1"
			
 
				+      rev="${jsp-api-2.1.version}"
			
 
				+      conf="common->master"/>
			
 
				+    <dependency org="org.mortbay.jetty"
			
 
				+      name="jsp-2.1"
			
 
				+      rev="${jsp-2.1.version}"
			
 
				+      conf="common->master"/>
			
 
				+    <dependency org="commons-cli"
			
 
				+      name="commons-cli"
			
 
				+      rev="${commons-cli.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.codehaus.jackson"
			
 
				+      name="jackson-mapper-asl"
			
 
				+      rev="${jackson.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.codehaus.jackson"
			
 
				+      name="jackson-core-asl"
			
 
				+      rev="${jackson.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="commons-configuration"
			
 
				+      name="commons-configuration"
			
 
				+      rev="${commons-configuration.version}"
			
 
				+      conf="common->default"/>
			
 
				+    <dependency org="org.apache.commons"
			
 
				+      name="commons-math"
			
 
				+      rev="${commons-math.version}"
			
 
				+      conf="common->default"/>
			
 
				+  </dependencies>
			
 
				+</ivy-module>
			
--- a/src/contrib/gridmix/ivy/libraries.properties
+++ b/src/contrib/gridmix/ivy/libraries.properties
@@ -0,0 +1,24 @@
 
				+#   Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+#   contributor license agreements.  See the NOTICE file distributed with
			
 
				+#   this work for additional information regarding copyright ownership.
			
 
				+#   The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+#   (the "License"); you may not use this file except in compliance with
			
 
				+#   the License.  You may obtain a copy of the License at
			
 
				+#
			
 
				+#       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+#
			
 
				+#   Unless required by applicable law or agreed to in writing, software
			
 
				+#   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+#   See the License for the specific language governing permissions and
			
 
				+#   limitations under the License.
			
 
				+
			
 
				+#This properties file lists the versions of the various artifacts used by streaming.
			
 
				+#It drives ivy and the generation of a maven POM
			
 
				+
			
 
				+#Please list the dependencies name with version if they are different from the ones
			
 
				+#listed in the global libraries.properties file (in alphabetical order)
			
 
				+
			
 
				+jackson.version=1.0.1
			
 
				+commons-configuration.version=1.6
			
 
				+commons-math.version=2.1
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/AvgRecordFactory.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/AvgRecordFactory.java
@@ -0,0 +1,91 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+/**
			
 
				+ * Given byte and record targets, emit roughly equal-sized records satisfying
			
 
				+ * the contract.
			
 
				+ */
			
 
				+class AvgRecordFactory extends RecordFactory {
			
 
				+
			
 
				+  /**
			
 
				+   * Percentage of record for key data.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_KEY_FRC = "gridmix.key.fraction";
			
 
				+
			
 
				+
			
 
				+  private final long targetBytes;
			
 
				+  private final long targetRecords;
			
 
				+  private final long step;
			
 
				+  private final int avgrec;
			
 
				+  private final int keyLen;
			
 
				+  private long accBytes = 0L;
			
 
				+  private long accRecords = 0L;
			
 
				+
			
 
				+  /**
			
 
				+   * @param targetBytes Expected byte count.
			
 
				+   * @param targetRecords Expected record count.
			
 
				+   * @param conf Used to resolve edge cases @see #GRIDMIX_KEY_FRC
			
 
				+   */
			
 
				+  public AvgRecordFactory(long targetBytes, long targetRecords,
			
 
				+      Configuration conf) {
			
 
				+    this.targetBytes = targetBytes;
			
 
				+    this.targetRecords = targetRecords <= 0 && this.targetBytes >= 0
			
 
				+      ? Math.max(1,
			
 
				+          this.targetBytes / conf.getInt("gridmix.missing.rec.size", 64 * 1024))
			
 
				+      : targetRecords;
			
 
				+    final long tmp = this.targetBytes / this.targetRecords;
			
 
				+    step = this.targetBytes - this.targetRecords * tmp;
			
 
				+    avgrec = (int) Math.min(Integer.MAX_VALUE, tmp + 1);
			
 
				+    keyLen = Math.max(1,
			
 
				+        (int)(tmp * Math.min(1.0f, conf.getFloat(GRIDMIX_KEY_FRC, 0.1f))));
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public boolean next(GridmixKey key, GridmixRecord val) throws IOException {
			
 
				+    if (accBytes >= targetBytes) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    final int reclen = accRecords++ >= step ? avgrec - 1 : avgrec;
			
 
				+    final int len = (int) Math.min(targetBytes - accBytes, reclen);
			
 
				+    // len != reclen?
			
 
				+    if (key != null) {
			
 
				+      key.setSize(keyLen);
			
 
				+      val.setSize(len - key.getSize());
			
 
				+    } else {
			
 
				+      val.setSize(len);
			
 
				+    }
			
 
				+    accBytes += len;
			
 
				+    return true;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public float getProgress() throws IOException {
			
 
				+    return Math.min(1.0f, accBytes / ((float)targetBytes));
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void close() throws IOException {
			
 
				+    // noop
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/CombineFileSplit.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/CombineFileSplit.java
@@ -0,0 +1,196 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.DataInput;
			
 
				+import java.io.DataOutput;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.Writable;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.RecordReader;
			
 
				+
			
 
				+/**
			
 
				+ * A sub-collection of input files. 
			
 
				+ * 
			
 
				+ * Unlike {@link FileSplit}, CombineFileSplit class does not represent 
			
 
				+ * a split of a file, but a split of input files into smaller sets. 
			
 
				+ * A split may contain blocks from different file but all 
			
 
				+ * the blocks in the same split are probably local to some rack <br> 
			
 
				+ * CombineFileSplit can be used to implement {@link RecordReader}'s, 
			
 
				+ * with reading one record per file.
			
 
				+ * 
			
 
				+ * @see FileSplit
			
 
				+ * @see CombineFileInputFormat 
			
 
				+ */
			
 
				+public class CombineFileSplit extends InputSplit implements Writable {
			
 
				+
			
 
				+  private Path[] paths;
			
 
				+  private long[] startoffset;
			
 
				+  private long[] lengths;
			
 
				+  private String[] locations;
			
 
				+  private long totLength;
			
 
				+
			
 
				+  /**
			
 
				+   * default constructor
			
 
				+   */
			
 
				+  public CombineFileSplit() {}
			
 
				+  public CombineFileSplit(Path[] files, long[] start, 
			
 
				+                          long[] lengths, String[] locations) {
			
 
				+    initSplit(files, start, lengths, locations);
			
 
				+  }
			
 
				+
			
 
				+  public CombineFileSplit(Path[] files, long[] lengths) {
			
 
				+    long[] startoffset = new long[files.length];
			
 
				+    for (int i = 0; i < startoffset.length; i++) {
			
 
				+      startoffset[i] = 0;
			
 
				+    }
			
 
				+    String[] locations = new String[files.length];
			
 
				+    for (int i = 0; i < locations.length; i++) {
			
 
				+      locations[i] = "";
			
 
				+    }
			
 
				+    initSplit(files, startoffset, lengths, locations);
			
 
				+  }
			
 
				+  
			
 
				+  private void initSplit(Path[] files, long[] start, 
			
 
				+                         long[] lengths, String[] locations) {
			
 
				+    this.startoffset = start;
			
 
				+    this.lengths = lengths;
			
 
				+    this.paths = files;
			
 
				+    this.totLength = 0;
			
 
				+    this.locations = locations;
			
 
				+    for(long length : lengths) {
			
 
				+      totLength += length;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Copy constructor
			
 
				+   */
			
 
				+  public CombineFileSplit(CombineFileSplit old) throws IOException {
			
 
				+    this(old.getPaths(), old.getStartOffsets(),
			
 
				+         old.getLengths(), old.getLocations());
			
 
				+  }
			
 
				+
			
 
				+  public long getLength() {
			
 
				+    return totLength;
			
 
				+  }
			
 
				+
			
 
				+  /** Returns an array containing the start offsets of the files in the split*/ 
			
 
				+  public long[] getStartOffsets() {
			
 
				+    return startoffset;
			
 
				+  }
			
 
				+  
			
 
				+  /** Returns an array containing the lengths of the files in the split*/ 
			
 
				+  public long[] getLengths() {
			
 
				+    return lengths;
			
 
				+  }
			
 
				+
			
 
				+  /** Returns the start offset of the i<sup>th</sup> Path */
			
 
				+  public long getOffset(int i) {
			
 
				+    return startoffset[i];
			
 
				+  }
			
 
				+  
			
 
				+  /** Returns the length of the i<sup>th</sup> Path */
			
 
				+  public long getLength(int i) {
			
 
				+    return lengths[i];
			
 
				+  }
			
 
				+  
			
 
				+  /** Returns the number of Paths in the split */
			
 
				+  public int getNumPaths() {
			
 
				+    return paths.length;
			
 
				+  }
			
 
				+
			
 
				+  /** Returns the i<sup>th</sup> Path */
			
 
				+  public Path getPath(int i) {
			
 
				+    return paths[i];
			
 
				+  }
			
 
				+  
			
 
				+  /** Returns all the Paths in the split */
			
 
				+  public Path[] getPaths() {
			
 
				+    return paths;
			
 
				+  }
			
 
				+
			
 
				+  /** Returns all the Paths where this input-split resides */
			
 
				+  public String[] getLocations() throws IOException {
			
 
				+    return locations;
			
 
				+  }
			
 
				+
			
 
				+  public void readFields(DataInput in) throws IOException {
			
 
				+    totLength = in.readLong();
			
 
				+    int arrLength = in.readInt();
			
 
				+    lengths = new long[arrLength];
			
 
				+    for(int i=0; i<arrLength;i++) {
			
 
				+      lengths[i] = in.readLong();
			
 
				+    }
			
 
				+    int filesLength = in.readInt();
			
 
				+    paths = new Path[filesLength];
			
 
				+    for(int i=0; i<filesLength;i++) {
			
 
				+      paths[i] = new Path(Text.readString(in));
			
 
				+    }
			
 
				+    arrLength = in.readInt();
			
 
				+    startoffset = new long[arrLength];
			
 
				+    for(int i=0; i<arrLength;i++) {
			
 
				+      startoffset[i] = in.readLong();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public void write(DataOutput out) throws IOException {
			
 
				+    out.writeLong(totLength);
			
 
				+    out.writeInt(lengths.length);
			
 
				+    for(long length : lengths) {
			
 
				+      out.writeLong(length);
			
 
				+    }
			
 
				+    out.writeInt(paths.length);
			
 
				+    for(Path p : paths) {
			
 
				+      Text.writeString(out, p.toString());
			
 
				+    }
			
 
				+    out.writeInt(startoffset.length);
			
 
				+    for(long length : startoffset) {
			
 
				+      out.writeLong(length);
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  @Override
			
 
				+ public String toString() {
			
 
				+    StringBuffer sb = new StringBuffer();
			
 
				+    for (int i = 0; i < paths.length; i++) {
			
 
				+      if (i == 0 ) {
			
 
				+        sb.append("Paths:");
			
 
				+      }
			
 
				+      sb.append(paths[i].toUri().getPath() + ":" + startoffset[i] +
			
 
				+                "+" + lengths[i]);
			
 
				+      if (i < paths.length -1) {
			
 
				+        sb.append(",");
			
 
				+      }
			
 
				+    }
			
 
				+    if (locations != null) {
			
 
				+      String locs = "";
			
 
				+      StringBuffer locsb = new StringBuffer();
			
 
				+      for (int i = 0; i < locations.length; i++) {
			
 
				+        locsb.append(locations[i] + ":");
			
 
				+      }
			
 
				+      locs = locsb.toString();
			
 
				+      sb.append(" Locations:" + locs + "; ");
			
 
				+    }
			
 
				+    return sb.toString();
			
 
				+  }
			
 
				+}
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/EchoUserResolver.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/EchoUserResolver.java
@@ -0,0 +1,53 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.net.URI;
			
 
				+import java.util.Collections;
			
 
				+import java.util.List;
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.security.UserGroupInformation;
			
 
				+import org.apache.hadoop.security.ShellBasedUnixGroupsMapping;
			
 
				+import org.apache.hadoop.security.Groups;
			
 
				+import org.apache.hadoop.fs.CommonConfigurationKeys;
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+
			
 
				+/**
			
 
				+ * Echos the UGI offered.
			
 
				+ */
			
 
				+public class EchoUserResolver implements UserResolver {
			
 
				+  public static final Log LOG = LogFactory.getLog(Gridmix.class);
			
 
				+
			
 
				+  public EchoUserResolver() {
			
 
				+    LOG.info(" Current user resolver is EchoUserResolver ");
			
 
				+  }
			
 
				+
			
 
				+  public synchronized boolean setTargetUsers(URI userdesc, Configuration conf)
			
 
				+      throws IOException {
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  public synchronized UserGroupInformation getTargetUgi(
			
 
				+      UserGroupInformation ugi) {
			
 
				+    return ugi;
			
 
				+  }
			
 
				+}
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/FilePool.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/FilePool.java
@@ -0,0 +1,301 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import java.util.Arrays;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Collection;
			
 
				+import java.util.Comparator;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.HashSet;
			
 
				+import java.util.Iterator;
			
 
				+import java.util.Map;
			
 
				+import java.util.Random;
			
 
				+import java.util.concurrent.locks.ReadWriteLock;
			
 
				+import java.util.concurrent.locks.ReentrantReadWriteLock;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.BlockLocation;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.mapred.gridmix.RandomAlgorithms.Selector;
			
 
				+
			
 
				+/**
			
 
				+ * Class for caching a pool of input data to be used by synthetic jobs for
			
 
				+ * simulating read traffic.
			
 
				+ */
			
 
				+class FilePool {
			
 
				+
			
 
				+  public static final Log LOG = LogFactory.getLog(FilePool.class);
			
 
				+
			
 
				+  /**
			
 
				+   * The minimum file size added to the pool. Default 128MiB.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_MIN_FILE = "gridmix.min.file.size";
			
 
				+
			
 
				+  /**
			
 
				+   * The maximum size for files added to the pool. Defualts to 100TiB.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_MAX_TOTAL = "gridmix.max.total.scan";
			
 
				+
			
 
				+  private Node root;
			
 
				+  private final Path path;
			
 
				+  private final FileSystem fs;
			
 
				+  private final Configuration conf;
			
 
				+  private final ReadWriteLock updateLock;
			
 
				+
			
 
				+  /**
			
 
				+   * Initialize a filepool under the path provided, but do not populate the
			
 
				+   * cache.
			
 
				+   */
			
 
				+  public FilePool(Configuration conf, Path input) throws IOException {
			
 
				+    root = null;
			
 
				+    this.conf = conf;
			
 
				+    this.path = input;
			
 
				+    this.fs = path.getFileSystem(conf);
			
 
				+    updateLock = new ReentrantReadWriteLock();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Gather a collection of files at least as large as minSize.
			
 
				+   * @return The total size of files returned.
			
 
				+   */
			
 
				+  public long getInputFiles(long minSize, Collection<FileStatus> files)
			
 
				+      throws IOException {
			
 
				+    updateLock.readLock().lock();
			
 
				+    try {
			
 
				+      return root.selectFiles(minSize, files);
			
 
				+    } finally {
			
 
				+      updateLock.readLock().unlock();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * (Re)generate cache of input FileStatus objects.
			
 
				+   */
			
 
				+  public void refresh() throws IOException {
			
 
				+    updateLock.writeLock().lock();
			
 
				+    try {
			
 
				+      root = new InnerDesc(fs, fs.getFileStatus(path),
			
 
				+        new MinFileFilter(conf.getLong(GRIDMIX_MIN_FILE, 128 * 1024 * 1024),
			
 
				+                          conf.getLong(GRIDMIX_MAX_TOTAL, 100L * (1L << 40))));
			
 
				+      if (0 == root.getSize()) {
			
 
				+        throw new IOException("Found no satisfactory file in " + path);
			
 
				+      }
			
 
				+    } finally {
			
 
				+      updateLock.writeLock().unlock();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get a set of locations for the given file.
			
 
				+   */
			
 
				+  public BlockLocation[] locationsFor(FileStatus stat, long start, long len)
			
 
				+      throws IOException {
			
 
				+    // TODO cache
			
 
				+    return fs.getFileBlockLocations(stat, start, len);
			
 
				+  }
			
 
				+
			
 
				+  static abstract class Node {
			
 
				+
			
 
				+    protected final static Random rand = new Random();
			
 
				+
			
 
				+    /**
			
 
				+     * Total size of files and directories under the current node.
			
 
				+     */
			
 
				+    abstract long getSize();
			
 
				+
			
 
				+    /**
			
 
				+     * Return a set of files whose cumulative size is at least
			
 
				+     * <tt>targetSize</tt>.
			
 
				+     * TODO Clearly size is not the only criterion, e.g. refresh from
			
 
				+     * generated data without including running task output, tolerance
			
 
				+     * for permission issues, etc.
			
 
				+     */
			
 
				+    abstract long selectFiles(long targetSize, Collection<FileStatus> files)
			
 
				+        throws IOException;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Files in current directory of this Node.
			
 
				+   */
			
 
				+  static class LeafDesc extends Node {
			
 
				+    final long size;
			
 
				+    final ArrayList<FileStatus> curdir;
			
 
				+
			
 
				+    LeafDesc(ArrayList<FileStatus> curdir, long size) {
			
 
				+      this.size = size;
			
 
				+      this.curdir = curdir;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public long getSize() {
			
 
				+      return size;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public long selectFiles(long targetSize, Collection<FileStatus> files)
			
 
				+        throws IOException {
			
 
				+      if (targetSize >= getSize()) {
			
 
				+        files.addAll(curdir);
			
 
				+        return getSize();
			
 
				+      }
			
 
				+
			
 
				+      Selector selector = new Selector(curdir.size(), (double) targetSize
			
 
				+          / getSize(), rand);
			
 
				+      
			
 
				+      ArrayList<Integer> selected = new ArrayList<Integer>();
			
 
				+      long ret = 0L;
			
 
				+      do {
			
 
				+        int index = selector.next();
			
 
				+        selected.add(index);
			
 
				+        ret += curdir.get(index).getLen();
			
 
				+      } while (ret < targetSize);
			
 
				+
			
 
				+      for (Integer i : selected) {
			
 
				+        files.add(curdir.get(i));
			
 
				+      }
			
 
				+
			
 
				+      return ret;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * A subdirectory of the current Node.
			
 
				+   */
			
 
				+  static class InnerDesc extends Node {
			
 
				+    final long size;
			
 
				+    final double[] dist;
			
 
				+    final Node[] subdir;
			
 
				+
			
 
				+    private static final Comparator<Node> nodeComparator =
			
 
				+      new Comparator<Node>() {
			
 
				+          public int compare(Node n1, Node n2) {
			
 
				+            return n1.getSize() < n2.getSize() ? -1
			
 
				+                 : n1.getSize() > n2.getSize() ? 1 : 0;
			
 
				+          }
			
 
				+    };
			
 
				+
			
 
				+    InnerDesc(final FileSystem fs, FileStatus thisDir, MinFileFilter filter)
			
 
				+        throws IOException {
			
 
				+      long fileSum = 0L;
			
 
				+      final ArrayList<FileStatus> curFiles = new ArrayList<FileStatus>();
			
 
				+      final ArrayList<FileStatus> curDirs = new ArrayList<FileStatus>();
			
 
				+      for (FileStatus stat : fs.listStatus(thisDir.getPath())) {
			
 
				+        if (stat.isDir()) {
			
 
				+          curDirs.add(stat);
			
 
				+        } else if (filter.accept(stat)) {
			
 
				+          curFiles.add(stat);
			
 
				+          fileSum += stat.getLen();
			
 
				+        }
			
 
				+      }
			
 
				+      ArrayList<Node> subdirList = new ArrayList<Node>();
			
 
				+      if (!curFiles.isEmpty()) {
			
 
				+        subdirList.add(new LeafDesc(curFiles, fileSum));
			
 
				+      }
			
 
				+      for (Iterator<FileStatus> i = curDirs.iterator();
			
 
				+          !filter.done() && i.hasNext();) {
			
 
				+        // add subdirectories
			
 
				+        final Node d = new InnerDesc(fs, i.next(), filter);
			
 
				+        final long dSize = d.getSize();
			
 
				+        if (dSize > 0) {
			
 
				+          fileSum += dSize;
			
 
				+          subdirList.add(d);
			
 
				+        }
			
 
				+      }
			
 
				+      size = fileSum;
			
 
				+      LOG.debug(size + " bytes in " + thisDir.getPath());
			
 
				+      subdir = subdirList.toArray(new Node[subdirList.size()]);
			
 
				+      Arrays.sort(subdir, nodeComparator);
			
 
				+      dist = new double[subdir.length];
			
 
				+      for (int i = dist.length - 1; i > 0; --i) {
			
 
				+        fileSum -= subdir[i].getSize();
			
 
				+        dist[i] = fileSum / (1.0 * size);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public long getSize() {
			
 
				+      return size;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public long selectFiles(long targetSize, Collection<FileStatus> files)
			
 
				+        throws IOException {
			
 
				+      long ret = 0L;
			
 
				+      if (targetSize >= getSize()) {
			
 
				+        // request larger than all subdirs; add everything
			
 
				+        for (Node n : subdir) {
			
 
				+          long added = n.selectFiles(targetSize, files);
			
 
				+          ret += added;
			
 
				+          targetSize -= added;
			
 
				+        }
			
 
				+        return ret;
			
 
				+      }
			
 
				+
			
 
				+      // can satisfy request in proper subset of contents
			
 
				+      // select random set, weighted by size
			
 
				+      final HashSet<Node> sub = new HashSet<Node>();
			
 
				+      do {
			
 
				+        assert sub.size() < subdir.length;
			
 
				+        final double r = rand.nextDouble();
			
 
				+        int pos = Math.abs(Arrays.binarySearch(dist, r) + 1) - 1;
			
 
				+        while (sub.contains(subdir[pos])) {
			
 
				+          pos = (pos + 1) % subdir.length;
			
 
				+        }
			
 
				+        long added = subdir[pos].selectFiles(targetSize, files);
			
 
				+        ret += added;
			
 
				+        targetSize -= added;
			
 
				+        sub.add(subdir[pos]);
			
 
				+      } while (targetSize > 0);
			
 
				+      return ret;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Filter enforcing the minFile/maxTotal parameters of the scan.
			
 
				+   */
			
 
				+  private static class MinFileFilter {
			
 
				+
			
 
				+    private long totalScan;
			
 
				+    private final long minFileSize;
			
 
				+
			
 
				+    public MinFileFilter(long minFileSize, long totalScan) {
			
 
				+      this.minFileSize = minFileSize;
			
 
				+      this.totalScan = totalScan;
			
 
				+    }
			
 
				+    public boolean done() {
			
 
				+      return totalScan <= 0;
			
 
				+    }
			
 
				+    public boolean accept(FileStatus stat) {
			
 
				+      final boolean done = done();
			
 
				+      if (!done && stat.getLen() >= minFileSize) {
			
 
				+        totalScan -= stat.getLen();
			
 
				+        return true;
			
 
				+      }
			
 
				+      return false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/FileQueue.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/FileQueue.java
@@ -0,0 +1,104 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FSDataInputStream;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+
			
 
				+/**
			
 
				+ * Given a {@link org.apache.hadoop.mapreduce.lib.input.CombineFileSplit},
			
 
				+ * circularly read through each input source.
			
 
				+ */
			
 
				+class FileQueue extends InputStream {
			
 
				+
			
 
				+  private int idx = -1;
			
 
				+  private long curlen = -1L;
			
 
				+  private FSDataInputStream input;
			
 
				+  private final byte[] z = new byte[1];
			
 
				+  private final Path[] paths;
			
 
				+  private final long[] lengths;
			
 
				+  private final long[] startoffset;
			
 
				+  private final Configuration conf;
			
 
				+
			
 
				+  /**
			
 
				+   * @param split Description of input sources.
			
 
				+   * @param conf Used to resolve FileSystem instances.
			
 
				+   */
			
 
				+  public FileQueue(CombineFileSplit split, Configuration conf)
			
 
				+      throws IOException {
			
 
				+    this.conf = conf;
			
 
				+    paths = split.getPaths();
			
 
				+    startoffset = split.getStartOffsets();
			
 
				+    lengths = split.getLengths();
			
 
				+    nextSource();
			
 
				+  }
			
 
				+
			
 
				+  protected void nextSource() throws IOException {
			
 
				+    if (0 == paths.length) {
			
 
				+      return;
			
 
				+    }
			
 
				+    if (input != null) {
			
 
				+      input.close();
			
 
				+    }
			
 
				+    idx = (idx + 1) % paths.length;
			
 
				+    curlen = lengths[idx];
			
 
				+    final Path file = paths[idx];
			
 
				+    final FileSystem fs = file.getFileSystem(conf);
			
 
				+    input = fs.open(file);
			
 
				+    input.seek(startoffset[idx]);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public int read() throws IOException {
			
 
				+    final int tmp = read(z);
			
 
				+    return tmp == -1 ? -1 : (0xFF & z[0]);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public int read(byte[] b) throws IOException {
			
 
				+    return read(b, 0, b.length);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public int read(byte[] b, int off, int len) throws IOException {
			
 
				+    int kvread = 0;
			
 
				+    while (kvread < len) {
			
 
				+      if (curlen <= 0) {
			
 
				+        nextSource();
			
 
				+        continue;
			
 
				+      }
			
 
				+      final int srcRead = (int) Math.min(len - kvread, curlen);
			
 
				+      IOUtils.readFully(input, b, kvread, srcRead);
			
 
				+      curlen -= srcRead;
			
 
				+      kvread += srcRead;
			
 
				+    }
			
 
				+    return kvread;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void close() throws IOException {
			
 
				+    input.close();
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GenerateData.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GenerateData.java
@@ -0,0 +1,324 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.DataInput;
			
 
				+import java.io.DataOutput;
			
 
				+import java.io.OutputStream;
			
 
				+import java.security.PrivilegedExceptionAction;
			
 
				+import java.util.Arrays;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.List;
			
 
				+import java.util.Random;
			
 
				+import java.util.regex.Matcher;
			
 
				+import java.util.regex.Pattern;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.permission.FsPermission;
			
 
				+import org.apache.hadoop.io.BytesWritable;
			
 
				+import org.apache.hadoop.io.LongWritable;
			
 
				+import org.apache.hadoop.io.NullWritable;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.Writable;
			
 
				+import org.apache.hadoop.mapred.ClusterStatus;
			
 
				+import org.apache.hadoop.mapred.JobClient;
			
 
				+import org.apache.hadoop.mapred.JobConf;
			
 
				+import org.apache.hadoop.mapreduce.InputFormat;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.Job;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.hadoop.mapreduce.RecordReader;
			
 
				+import org.apache.hadoop.mapreduce.RecordWriter;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
			
 
				+import org.apache.hadoop.security.UserGroupInformation;
			
 
				+
			
 
				+// TODO can replace with form of GridmixJob
			
 
				+class GenerateData extends GridmixJob {
			
 
				+
			
 
				+
			
 
				+  /**
			
 
				+   * Total bytes to write.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_GEN_BYTES = "gridmix.gen.bytes";
			
 
				+
			
 
				+  /**
			
 
				+   * Maximum size per file written.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_GEN_CHUNK = "gridmix.gen.bytes.per.file";
			
 
				+
			
 
				+  /**
			
 
				+   * Size of writes to output file.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_VAL_BYTES = "gendata.val.bytes";
			
 
				+
			
 
				+  /**
			
 
				+   * Status reporting interval, in megabytes.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_GEN_INTERVAL = "gendata.interval.mb";
			
 
				+
			
 
				+  /**
			
 
				+   * Blocksize of generated data.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_GEN_BLOCKSIZE = "gridmix.gen.blocksize";
			
 
				+
			
 
				+  /**
			
 
				+   * Replication of generated data.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_GEN_REPLICATION = "gridmix.gen.replicas";
			
 
				+
			
 
				+  public GenerateData(Configuration conf, Path outdir, long genbytes)
			
 
				+      throws IOException {
			
 
				+    super(conf, 0L, "GRIDMIX_GENDATA");
			
 
				+    job.getConfiguration().setLong(GRIDMIX_GEN_BYTES, genbytes);
			
 
				+    FileOutputFormat.setOutputPath(job, outdir);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public Job call() throws IOException, InterruptedException,
			
 
				+                           ClassNotFoundException {
			
 
				+    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
			
 
				+    ugi.doAs( new PrivilegedExceptionAction <Job>() {
			
 
				+       public Job run() throws IOException, ClassNotFoundException,
			
 
				+                               InterruptedException {
			
 
				+        job.setMapperClass(GenDataMapper.class);
			
 
				+        job.setNumReduceTasks(0);
			
 
				+        job.setMapOutputKeyClass(NullWritable.class);
			
 
				+        job.setMapOutputValueClass(BytesWritable.class);
			
 
				+        job.setInputFormatClass(GenDataFormat.class);
			
 
				+        job.setOutputFormatClass(RawBytesOutputFormat.class);
			
 
				+        job.setJarByClass(GenerateData.class);
			
 
				+        try {
			
 
				+          FileInputFormat.addInputPath(job, new Path("ignored"));
			
 
				+        } catch (IOException e) {
			
 
				+          LOG.error("Error  while adding input path ", e);
			
 
				+        }
			
 
				+        job.submit();
			
 
				+        return job;
			
 
				+      }
			
 
				+    });
			
 
				+    return job;
			
 
				+  }
			
 
				+
			
 
				+  public static class GenDataMapper
			
 
				+      extends Mapper<NullWritable,LongWritable,NullWritable,BytesWritable> {
			
 
				+
			
 
				+    private BytesWritable val;
			
 
				+    private final Random r = new Random();
			
 
				+
			
 
				+    @Override
			
 
				+    protected void setup(Context context)
			
 
				+        throws IOException, InterruptedException {
			
 
				+      val = new BytesWritable(new byte[
			
 
				+          context.getConfiguration().getInt(GRIDMIX_VAL_BYTES, 1024 * 1024)]);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void map(NullWritable key, LongWritable value, Context context)
			
 
				+        throws IOException, InterruptedException {
			
 
				+      for (long bytes = value.get(); bytes > 0; bytes -= val.getLength()) {
			
 
				+        r.nextBytes(val.getBytes());
			
 
				+        val.setSize((int)Math.min(val.getLength(), bytes));
			
 
				+        context.write(key, val);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  static class GenDataFormat extends InputFormat<NullWritable,LongWritable> {
			
 
				+
			
 
				+    @Override
			
 
				+    public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
			
 
				+      final JobClient client =
			
 
				+        new JobClient(new JobConf(jobCtxt.getConfiguration()));
			
 
				+      ClusterStatus stat = client.getClusterStatus(true);
			
 
				+      final long toGen =
			
 
				+        jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
			
 
				+      if (toGen < 0) {
			
 
				+        throw new IOException("Invalid/missing generation bytes: " + toGen);
			
 
				+      }
			
 
				+      final int nTrackers = stat.getTaskTrackers();
			
 
				+      final long bytesPerTracker = toGen / nTrackers;
			
 
				+      final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
			
 
				+      final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
			
 
				+      final Matcher m = trackerPattern.matcher("");
			
 
				+      for (String tracker : stat.getActiveTrackerNames()) {
			
 
				+        m.reset(tracker);
			
 
				+        if (!m.find()) {
			
 
				+          System.err.println("Skipping node: " + tracker);
			
 
				+          continue;
			
 
				+        }
			
 
				+        final String name = m.group(1);
			
 
				+        splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
			
 
				+      }
			
 
				+      return splits;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public RecordReader<NullWritable,LongWritable> createRecordReader(
			
 
				+        InputSplit split, final TaskAttemptContext taskContext)
			
 
				+        throws IOException {
			
 
				+      return new RecordReader<NullWritable,LongWritable>() {
			
 
				+        long written = 0L;
			
 
				+        long write = 0L;
			
 
				+        long RINTERVAL;
			
 
				+        long toWrite;
			
 
				+        final NullWritable key = NullWritable.get();
			
 
				+        final LongWritable val = new LongWritable();
			
 
				+
			
 
				+        @Override
			
 
				+        public void initialize(InputSplit split, TaskAttemptContext ctxt)
			
 
				+            throws IOException, InterruptedException {
			
 
				+          toWrite = split.getLength();
			
 
				+          RINTERVAL = ctxt.getConfiguration().getInt(
			
 
				+              GRIDMIX_GEN_INTERVAL, 10) << 20;
			
 
				+        }
			
 
				+        @Override
			
 
				+        public boolean nextKeyValue() throws IOException {
			
 
				+          written += write;
			
 
				+          write = Math.min(toWrite - written, RINTERVAL);
			
 
				+          val.set(write);
			
 
				+          return written < toWrite;
			
 
				+        }
			
 
				+        @Override
			
 
				+        public float getProgress() throws IOException {
			
 
				+          return written / ((float)toWrite);
			
 
				+        }
			
 
				+        @Override
			
 
				+        public NullWritable getCurrentKey() { return key; }
			
 
				+        @Override
			
 
				+        public LongWritable getCurrentValue() { return val; }
			
 
				+        @Override
			
 
				+        public void close() throws IOException {
			
 
				+          taskContext.setStatus("Wrote " + toWrite);
			
 
				+        }
			
 
				+      };
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static class GenSplit extends InputSplit implements Writable {
			
 
				+    private long bytes;
			
 
				+    private int nLoc;
			
 
				+    private String[] locations;
			
 
				+
			
 
				+    public GenSplit() { }
			
 
				+    public GenSplit(long bytes, String[] locations) {
			
 
				+      this(bytes, locations.length, locations);
			
 
				+    }
			
 
				+    public GenSplit(long bytes, int nLoc, String[] locations) {
			
 
				+      this.bytes = bytes;
			
 
				+      this.nLoc = nLoc;
			
 
				+      this.locations = Arrays.copyOf(locations, nLoc);
			
 
				+    }
			
 
				+    @Override
			
 
				+    public long getLength() {
			
 
				+      return bytes;
			
 
				+    }
			
 
				+    @Override
			
 
				+    public String[] getLocations() {
			
 
				+      return locations;
			
 
				+    }
			
 
				+    @Override
			
 
				+    public void readFields(DataInput in) throws IOException {
			
 
				+      bytes = in.readLong();
			
 
				+      nLoc = in.readInt();
			
 
				+      if (null == locations || locations.length < nLoc) {
			
 
				+        locations = new String[nLoc];
			
 
				+      }
			
 
				+      for (int i = 0; i < nLoc; ++i) {
			
 
				+        locations[i] = Text.readString(in);
			
 
				+      }
			
 
				+    }
			
 
				+    @Override
			
 
				+    public void write(DataOutput out) throws IOException {
			
 
				+      out.writeLong(bytes);
			
 
				+      out.writeInt(nLoc);
			
 
				+      for (int i = 0; i < nLoc; ++i) {
			
 
				+        Text.writeString(out, locations[i]);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static class RawBytesOutputFormat
			
 
				+      extends FileOutputFormat<NullWritable,BytesWritable> {
			
 
				+
			
 
				+    @Override
			
 
				+    public RecordWriter<NullWritable,BytesWritable> getRecordWriter(
			
 
				+        TaskAttemptContext job) throws IOException {
			
 
				+
			
 
				+      return new ChunkWriter(getDefaultWorkFile(job, ""),
			
 
				+          job.getConfiguration());
			
 
				+    }
			
 
				+
			
 
				+    static class ChunkWriter extends RecordWriter<NullWritable,BytesWritable> {
			
 
				+      private final Path outDir;
			
 
				+      private final FileSystem fs;
			
 
				+      private final int blocksize;
			
 
				+      private final short replicas;
			
 
				+      private final long maxFileBytes;
			
 
				+      private final FsPermission genPerms = new FsPermission((short) 0777);
			
 
				+
			
 
				+      private long accFileBytes = 0L;
			
 
				+      private long fileIdx = -1L;
			
 
				+      private OutputStream fileOut = null;
			
 
				+
			
 
				+      public ChunkWriter(Path outDir, Configuration conf) throws IOException {
			
 
				+        this.outDir = outDir;
			
 
				+        fs = outDir.getFileSystem(conf);
			
 
				+        blocksize = conf.getInt(GRIDMIX_GEN_BLOCKSIZE, 1 << 28);
			
 
				+        replicas = (short) conf.getInt(GRIDMIX_GEN_REPLICATION, 3);
			
 
				+        maxFileBytes = conf.getLong(GRIDMIX_GEN_CHUNK, 1L << 30);
			
 
				+        nextDestination();
			
 
				+      }
			
 
				+      private void nextDestination() throws IOException {
			
 
				+        if (fileOut != null) {
			
 
				+          fileOut.close();
			
 
				+        }
			
 
				+        fileOut = fs.create(new Path(outDir, "segment-" + (++fileIdx)),
			
 
				+            genPerms, false, 64 * 1024, replicas, blocksize, null);
			
 
				+        accFileBytes = 0L;
			
 
				+      }
			
 
				+      @Override
			
 
				+      public void write(NullWritable key, BytesWritable value)
			
 
				+          throws IOException {
			
 
				+        int written = 0;
			
 
				+        final int total = value.getLength();
			
 
				+        while (written < total) {
			
 
				+          if (accFileBytes >= maxFileBytes) {
			
 
				+            nextDestination();
			
 
				+          }
			
 
				+          final int write = (int)
			
 
				+            Math.min(total - written, maxFileBytes - accFileBytes);
			
 
				+          fileOut.write(value.getBytes(), written, write);
			
 
				+          written += write;
			
 
				+          accFileBytes += write;
			
 
				+        }
			
 
				+      }
			
 
				+      @Override
			
 
				+      public void close(TaskAttemptContext ctxt) throws IOException {
			
 
				+        fileOut.close();
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java
@@ -0,0 +1,495 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+import java.io.PrintStream;
			
 
				+import java.net.URI;
			
 
				+import java.security.PrivilegedExceptionAction;
			
 
				+import java.util.List;
			
 
				+import java.util.concurrent.CountDownLatch;
			
 
				+import java.util.concurrent.TimeUnit;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.conf.Configured;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.FsShell;
			
 
				+import org.apache.hadoop.fs.permission.FsPermission;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.mapreduce.Job;
			
 
				+import org.apache.hadoop.security.UserGroupInformation;
			
 
				+import org.apache.hadoop.util.ReflectionUtils;
			
 
				+import org.apache.hadoop.util.StringUtils;
			
 
				+import org.apache.hadoop.util.Tool;
			
 
				+import org.apache.hadoop.util.ToolRunner;
			
 
				+import org.apache.hadoop.tools.rumen.ZombieJobProducer;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+
			
 
				+/**
			
 
				+ * Driver class for the Gridmix3 benchmark. Gridmix accepts a timestamped
			
 
				+ * stream (trace) of job/task descriptions. For each job in the trace, the
			
 
				+ * client will submit a corresponding, synthetic job to the target cluster at
			
 
				+ * the rate in the original trace. The intent is to provide a benchmark that
			
 
				+ * can be configured and extended to closely match the measured resource
			
 
				+ * profile of actual, production loads.
			
 
				+ */
			
 
				+public class Gridmix extends Configured implements Tool {
			
 
				+
			
 
				+  public static final Log LOG = LogFactory.getLog(Gridmix.class);
			
 
				+
			
 
				+  /**
			
 
				+   * Output (scratch) directory for submitted jobs. Relative paths are
			
 
				+   * resolved against the path provided as input and absolute paths remain
			
 
				+   * independent of it. The default is &quot;gridmix&quot;.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_OUT_DIR = "gridmix.output.directory";
			
 
				+
			
 
				+  /**
			
 
				+   * Number of submitting threads at the client and upper bound for
			
 
				+   * in-memory split data. Submitting threads precompute InputSplits for
			
 
				+   * submitted jobs. This limits the number of splits held in memory waiting
			
 
				+   * for submission and also permits parallel computation of split data.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_SUB_THR = "gridmix.client.submit.threads";
			
 
				+
			
 
				+  /**
			
 
				+   * The depth of the queue of job descriptions. Before splits are computed,
			
 
				+   * a queue of pending descriptions is stored in memoory. This parameter
			
 
				+   * limits the depth of that queue.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_QUE_DEP =
			
 
				+    "gridmix.client.pending.queue.depth";
			
 
				+
			
 
				+  /**
			
 
				+   * Multiplier to accelerate or decelerate job submission. As a crude means of
			
 
				+   * sizing a job trace to a cluster, the time separating two jobs is
			
 
				+   * multiplied by this factor.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_SUB_MUL = "gridmix.submit.multiplier";
			
 
				+
			
 
				+  /**
			
 
				+   * Class used to resolve users in the trace to the list of target users
			
 
				+   * on the cluster.
			
 
				+   */
			
 
				+  public static final String GRIDMIX_USR_RSV = "gridmix.user.resolve.class";
			
 
				+
			
 
				+  // Submit data structures
			
 
				+  private JobFactory factory;
			
 
				+  private JobSubmitter submitter;
			
 
				+  private JobMonitor monitor;
			
 
				+  private Statistics statistics;
			
 
				+
			
 
				+  // Shutdown hook
			
 
				+  private final Shutdown sdh = new Shutdown();
			
 
				+
			
 
				+  /**
			
 
				+   * Write random bytes at the path provided.
			
 
				+   * @see org.apache.hadoop.mapred.gridmix.GenerateData
			
 
				+   */
			
 
				+  protected void writeInputData(long genbytes, Path ioPath)
			
 
				+      throws IOException, InterruptedException {
			
 
				+    final Configuration conf = getConf();
			
 
				+    final GridmixJob genData = new GenerateData(conf, ioPath, genbytes);
			
 
				+    submitter.add(genData);
			
 
				+    LOG.info("Generating " + StringUtils.humanReadableInt(genbytes) +
			
 
				+        " of test data...");
			
 
				+    // TODO add listeners, use for job dependencies
			
 
				+    TimeUnit.SECONDS.sleep(10);
			
 
				+    try {
			
 
				+      genData.getJob().waitForCompletion(false);
			
 
				+    } catch (ClassNotFoundException e) {
			
 
				+      throw new IOException("Internal error", e);
			
 
				+    }
			
 
				+    if (!genData.getJob().isSuccessful()) {
			
 
				+      throw new IOException("Data generation failed!");
			
 
				+    }
			
 
				+
			
 
				+    FsShell shell = new FsShell(conf);
			
 
				+    try {
			
 
				+      LOG.info("Changing the permissions for inputPath " + ioPath.toString());
			
 
				+      shell.run(new String[] {"-chmod","-R","777", ioPath.toString()});
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Couldnt change the file permissions " , e);
			
 
				+      throw new IOException(e);
			
 
				+    }
			
 
				+    LOG.info("Done.");
			
 
				+  }
			
 
				+
			
 
				+  protected InputStream createInputStream(String in) throws IOException {
			
 
				+    if ("-".equals(in)) {
			
 
				+      return System.in;
			
 
				+    }
			
 
				+    final Path pin = new Path(in);
			
 
				+    return pin.getFileSystem(getConf()).open(pin);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Create each component in the pipeline and start it.
			
 
				+   * @param conf Configuration data, no keys specific to this context
			
 
				+   * @param traceIn Either a Path to the trace data or &quot;-&quot; for
			
 
				+   *                stdin
			
 
				+   * @param ioPath Path from which input data is read
			
 
				+   * @param scratchDir Path into which job output is written
			
 
				+   * @param startFlag Semaphore for starting job trace pipeline
			
 
				+   */
			
 
				+  private void startThreads(Configuration conf, String traceIn, Path ioPath,
			
 
				+      Path scratchDir, CountDownLatch startFlag, UserResolver userResolver)
			
 
				+      throws IOException {
			
 
				+    try {
			
 
				+      GridmixJobSubmissionPolicy policy = GridmixJobSubmissionPolicy.getPolicy(
			
 
				+        conf, GridmixJobSubmissionPolicy.STRESS);
			
 
				+      LOG.info(" Submission policy is " + policy.name());
			
 
				+      statistics = new Statistics(conf, policy.getPollingInterval(), startFlag);
			
 
				+      monitor = createJobMonitor(statistics);
			
 
				+      int noOfSubmitterThreads = (policy == GridmixJobSubmissionPolicy.SERIAL) ? 1
			
 
				+          : Runtime.getRuntime().availableProcessors() + 1;
			
 
				+
			
 
				+      submitter = createJobSubmitter(
			
 
				+        monitor, conf.getInt(
			
 
				+          GRIDMIX_SUB_THR, noOfSubmitterThreads), conf.getInt(
			
 
				+          GRIDMIX_QUE_DEP, 5), new FilePool(
			
 
				+          conf, ioPath), userResolver,statistics);
			
 
				+      
			
 
				+      factory = createJobFactory(
			
 
				+        submitter, traceIn, scratchDir, conf, startFlag, userResolver);
			
 
				+      if (policy==GridmixJobSubmissionPolicy.SERIAL) {
			
 
				+        statistics.addJobStatsListeners(factory);
			
 
				+      } else {
			
 
				+        statistics.addClusterStatsObservers(factory);
			
 
				+      }
			
 
				+      
			
 
				+      monitor.start();
			
 
				+      submitter.start();
			
 
				+    }catch(Exception e) {
			
 
				+      LOG.error(" Exception at start " ,e);
			
 
				+      throw new IOException(e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  protected JobMonitor createJobMonitor(Statistics stats) throws IOException {
			
 
				+    return new JobMonitor(stats);
			
 
				+  }
			
 
				+
			
 
				+  protected JobSubmitter createJobSubmitter(
			
 
				+    JobMonitor monitor, int threads, int queueDepth, FilePool pool,
			
 
				+    UserResolver resolver, Statistics statistics) throws IOException {
			
 
				+    return new JobSubmitter(monitor, threads, queueDepth, pool, statistics);
			
 
				+  }
			
 
				+
			
 
				+  protected JobFactory createJobFactory(
			
 
				+    JobSubmitter submitter, String traceIn, Path scratchDir, Configuration conf,
			
 
				+    CountDownLatch startFlag, UserResolver resolver)
			
 
				+    throws IOException {
			
 
				+    return GridmixJobSubmissionPolicy.getPolicy(
			
 
				+      conf, GridmixJobSubmissionPolicy.STRESS).createJobFactory(
			
 
				+      submitter, new ZombieJobProducer(
			
 
				+        createInputStream(
			
 
				+          traceIn), null), scratchDir, conf, startFlag, resolver);
			
 
				+  }
			
 
				+
			
 
				+  public int run(final String[] argv) throws IOException, InterruptedException {
			
 
				+    int val = -1;
			
 
				+    final Configuration conf = getConf();
			
 
				+    UserGroupInformation.setConfiguration(conf);
			
 
				+    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
			
 
				+
			
 
				+    val = ugi.doAs(new PrivilegedExceptionAction<Integer>() {
			
 
				+      public Integer run() throws Exception {
			
 
				+        return runJob(conf,argv);
			
 
				+      }
			
 
				+    });
			
 
				+    return val; 
			
 
				+  }
			
 
				+
			
 
				+  private static UserResolver userResolver;
			
 
				+
			
 
				+  public UserResolver getCurrentUserResolver() {
			
 
				+    return userResolver;
			
 
				+  }
			
 
				+
			
 
				+  private int runJob(Configuration conf, String[] argv)
			
 
				+    throws IOException, InterruptedException {
			
 
				+    if (argv.length < 2) {
			
 
				+      printUsage(System.err);
			
 
				+      return 1;
			
 
				+    }
			
 
				+    long genbytes = -1L;
			
 
				+    String traceIn = null;
			
 
				+    Path ioPath = null;
			
 
				+    URI userRsrc = null;
			
 
				+    userResolver = ReflectionUtils.newInstance(
			
 
				+        conf.getClass(GRIDMIX_USR_RSV, SubmitterUserResolver.class,
			
 
				+          UserResolver.class), conf);
			
 
				+    try {
			
 
				+      for (int i = 0; i < argv.length - 2; ++i) {
			
 
				+        if ("-generate".equals(argv[i])) {
			
 
				+          genbytes = StringUtils.TraditionalBinaryPrefix.string2long(argv[++i]);
			
 
				+        } else if ("-users".equals(argv[i])) {
			
 
				+          userRsrc = new URI(argv[++i]);
			
 
				+        } else {
			
 
				+          printUsage(System.err);
			
 
				+          return 1;
			
 
				+        }
			
 
				+      }
			
 
				+      if (!userResolver.setTargetUsers(userRsrc, conf)) {
			
 
				+        LOG.warn("Resource " + userRsrc + " ignored");
			
 
				+      }
			
 
				+      ioPath = new Path(argv[argv.length - 2]);
			
 
				+      traceIn = argv[argv.length - 1];
			
 
				+    } catch (Exception e) {
			
 
				+      e.printStackTrace();
			
 
				+      printUsage(System.err);
			
 
				+      return 1;
			
 
				+    }
			
 
				+    return start(conf, traceIn, ioPath, genbytes, userResolver);
			
 
				+  }
			
 
				+
			
 
				+  int start(Configuration conf, String traceIn, Path ioPath, long genbytes,
			
 
				+      UserResolver userResolver) throws IOException, InterruptedException {
			
 
				+    InputStream trace = null;
			
 
				+    try {
			
 
				+      Path scratchDir = new Path(ioPath, conf.get(GRIDMIX_OUT_DIR, "gridmix"));
			
 
				+      final FileSystem scratchFs = scratchDir.getFileSystem(conf);
			
 
				+      scratchFs.mkdirs(scratchDir, new FsPermission((short) 0777));
			
 
				+      scratchFs.setPermission(scratchDir, new FsPermission((short) 0777));
			
 
				+      // add shutdown hook for SIGINT, etc.
			
 
				+      Runtime.getRuntime().addShutdownHook(sdh);
			
 
				+      CountDownLatch startFlag = new CountDownLatch(1);
			
 
				+      try {
			
 
				+        // Create, start job submission threads
			
 
				+        startThreads(conf, traceIn, ioPath, scratchDir, startFlag,
			
 
				+            userResolver);
			
 
				+        // Write input data if specified
			
 
				+        if (genbytes > 0) {
			
 
				+          writeInputData(genbytes, ioPath);
			
 
				+        }
			
 
				+        // scan input dir contents
			
 
				+        submitter.refreshFilePool();
			
 
				+        factory.start();
			
 
				+        statistics.start();
			
 
				+      } catch (Throwable e) {
			
 
				+        LOG.error("Startup failed", e);
			
 
				+        if (factory != null) factory.abort(); // abort pipeline
			
 
				+      } finally {
			
 
				+        // signal for factory to start; sets start time
			
 
				+        startFlag.countDown();
			
 
				+      }
			
 
				+      if (factory != null) {
			
 
				+        // wait for input exhaustion
			
 
				+        factory.join(Long.MAX_VALUE);
			
 
				+        final Throwable badTraceException = factory.error();
			
 
				+        if (null != badTraceException) {
			
 
				+          LOG.error("Error in trace", badTraceException);
			
 
				+          throw new IOException("Error in trace", badTraceException);
			
 
				+        }
			
 
				+        // wait for pending tasks to be submitted
			
 
				+        submitter.shutdown();
			
 
				+        submitter.join(Long.MAX_VALUE);
			
 
				+        // wait for running tasks to complete
			
 
				+        monitor.shutdown();
			
 
				+        monitor.join(Long.MAX_VALUE);
			
 
				+
			
 
				+        statistics.shutdown();
			
 
				+        statistics.join(Long.MAX_VALUE);
			
 
				+
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.cleanup(LOG, trace);
			
 
				+    }
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Handles orderly shutdown by requesting that each component in the
			
 
				+   * pipeline abort its progress, waiting for each to exit and killing
			
 
				+   * any jobs still running on the cluster.
			
 
				+   */
			
 
				+  class Shutdown extends Thread {
			
 
				+
			
 
				+    static final long FAC_SLEEP = 1000;
			
 
				+    static final long SUB_SLEEP = 4000;
			
 
				+    static final long MON_SLEEP = 15000;
			
 
				+
			
 
				+    private void killComponent(Component<?> component, long maxwait) {
			
 
				+      if (component == null) {
			
 
				+        return;
			
 
				+      }
			
 
				+      component.abort();
			
 
				+      try {
			
 
				+        component.join(maxwait);
			
 
				+      } catch (InterruptedException e) {
			
 
				+        LOG.warn("Interrupted waiting for " + component);
			
 
				+      }
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void run() {
			
 
				+      LOG.info("Exiting...");
			
 
				+      try {
			
 
				+        killComponent(factory, FAC_SLEEP);   // read no more tasks
			
 
				+        killComponent(submitter, SUB_SLEEP); // submit no more tasks
			
 
				+        killComponent(monitor, MON_SLEEP);   // process remaining jobs here
			
 
				+        killComponent(statistics,MON_SLEEP);
			
 
				+      } finally {
			
 
				+        if (monitor == null) {
			
 
				+          return;
			
 
				+        }
			
 
				+        List<Job> remainingJobs = monitor.getRemainingJobs();
			
 
				+        if (remainingJobs.isEmpty()) {
			
 
				+          return;
			
 
				+        }
			
 
				+        LOG.info("Killing running jobs...");
			
 
				+        for (Job job : remainingJobs) {
			
 
				+          try {
			
 
				+            if (!job.isComplete()) {
			
 
				+              job.killJob();
			
 
				+              LOG.info("Killed " + job.getJobName() + " (" +
			
 
				+                  job.getJobID() + ")");
			
 
				+            } else {
			
 
				+              if (job.isSuccessful()) {
			
 
				+                monitor.onSuccess(job);
			
 
				+              } else {
			
 
				+                monitor.onFailure(job);
			
 
				+              }
			
 
				+            }
			
 
				+          } catch (IOException e) {
			
 
				+            LOG.warn("Failure killing " + job.getJobName(), e);
			
 
				+          } catch (Exception e) {
			
 
				+            LOG.error("Unexcpected exception", e);
			
 
				+          }
			
 
				+        }
			
 
				+        LOG.info("Done.");
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  public static void main(String[] argv) throws Exception {
			
 
				+    int res = -1;
			
 
				+    try {
			
 
				+      res = ToolRunner.run(new Configuration(), new Gridmix(), argv);
			
 
				+    } finally {
			
 
				+      System.exit(res);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private <T> String getEnumValues(Enum<? extends T>[] e) {
			
 
				+    StringBuilder sb = new StringBuilder();
			
 
				+    String sep = "";
			
 
				+    for (Enum<? extends T> v : e) {
			
 
				+      sb.append(sep);
			
 
				+      sb.append(v.name());
			
 
				+      sep = "|";
			
 
				+    }
			
 
				+    return sb.toString();
			
 
				+  }
			
 
				+  
			
 
				+  private String getJobTypes() {
			
 
				+    return getEnumValues(JobCreator.values());
			
 
				+  }
			
 
				+  
			
 
				+  private String getSubmissionPolicies() {
			
 
				+    return getEnumValues(GridmixJobSubmissionPolicy.values());
			
 
				+  }
			
 
				+  
			
 
				+  protected void printUsage(PrintStream out) {
			
 
				+    ToolRunner.printGenericCommandUsage(out);
			
 
				+    out.println("Usage: gridmix [-generate <MiB>] [-users URI] [-Dname=value ...] <iopath> <trace>");
			
 
				+    out.println("  e.g. gridmix -generate 100m foo -");
			
 
				+    out.println("Configuration parameters:");
			
 
				+    out.println("   General parameters:");
			
 
				+    out.printf("       %-48s : Output directory\n", GRIDMIX_OUT_DIR);
			
 
				+    out.printf("       %-48s : Submitting threads\n", GRIDMIX_SUB_THR);
			
 
				+    out.printf("       %-48s : Queued job desc\n", GRIDMIX_QUE_DEP);
			
 
				+    out.printf("       %-48s : User resolution class\n", GRIDMIX_USR_RSV);
			
 
				+    out.printf("       %-48s : Job types (%s)\n", JobCreator.GRIDMIX_JOB_TYPE, getJobTypes());
			
 
				+    out.println("   Parameters related to job submission:");    
			
 
				+    out.printf("       %-48s : Default queue\n",
			
 
				+        GridmixJob.GRIDMIX_DEFAULT_QUEUE);
			
 
				+    out.printf("       %-48s : Enable/disable using queues in trace\n",
			
 
				+        GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE);
			
 
				+    out.printf("       %-48s : Job submission policy (%s)\n",
			
 
				+        GridmixJobSubmissionPolicy.JOB_SUBMISSION_POLICY, getSubmissionPolicies());
			
 
				+    out.println("   Parameters specific for LOADJOB:");
			
 
				+    out.printf("       %-48s : Key fraction of rec\n",
			
 
				+        AvgRecordFactory.GRIDMIX_KEY_FRC);
			
 
				+    out.println("   Parameters specific for SLEEPJOB:");
			
 
				+    out.printf("       %-48s : Whether to ignore reduce tasks\n",
			
 
				+        SleepJob.SLEEPJOB_MAPTASK_ONLY);
			
 
				+    out.printf("       %-48s : Number of fake locations for map tasks\n",
			
 
				+        JobCreator.SLEEPJOB_RANDOM_LOCATIONS);
			
 
				+    out.printf("       %-48s : Maximum map task runtime in mili-sec\n",
			
 
				+        SleepJob.GRIDMIX_SLEEP_MAX_MAP_TIME);
			
 
				+    out.printf("       %-48s : Maximum reduce task runtime in mili-sec (merge+reduce)\n",
			
 
				+        SleepJob.GRIDMIX_SLEEP_MAX_REDUCE_TIME);
			
 
				+    out.println("   Parameters specific for STRESS submission throttling policy:");
			
 
				+    out.printf("       %-48s : jobs vs task-tracker ratio\n",
			
 
				+        StressJobFactory.CONF_MAX_JOB_TRACKER_RATIO);
			
 
				+    out.printf("       %-48s : maps vs map-slot ratio\n",
			
 
				+        StressJobFactory.CONF_OVERLOAD_MAPTASK_MAPSLOT_RATIO);
			
 
				+    out.printf("       %-48s : reduces vs reduce-slot ratio\n",
			
 
				+        StressJobFactory.CONF_OVERLOAD_REDUCETASK_REDUCESLOT_RATIO);
			
 
				+    out.printf("       %-48s : map-slot share per job\n",
			
 
				+        StressJobFactory.CONF_MAX_MAPSLOT_SHARE_PER_JOB);
			
 
				+    out.printf("       %-48s : reduce-slot share per job\n",
			
 
				+        StressJobFactory.CONF_MAX_REDUCESLOT_SHARE_PER_JOB);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Components in the pipeline must support the following operations for
			
 
				+   * orderly startup and shutdown.
			
 
				+   */
			
 
				+  interface Component<T> {
			
 
				+
			
 
				+    /**
			
 
				+     * Accept an item into this component from an upstream component. If
			
 
				+     * shutdown or abort have been called, this may fail, depending on the
			
 
				+     * semantics for the component.
			
 
				+     */
			
 
				+    void add(T item) throws InterruptedException;
			
 
				+
			
 
				+    /**
			
 
				+     * Attempt to start the service.
			
 
				+     */
			
 
				+    void start();
			
 
				+
			
 
				+    /**
			
 
				+     * Wait until the service completes. It is assumed that either a
			
 
				+     * {@link #shutdown} or {@link #abort} has been requested.
			
 
				+     */
			
 
				+    void join(long millis) throws InterruptedException;
			
 
				+
			
 
				+    /**
			
 
				+     * Shut down gracefully, finishing all pending work. Reject new requests.
			
 
				+     */
			
 
				+    void shutdown();
			
 
				+
			
 
				+    /**
			
 
				+     * Shut down immediately, aborting any work in progress and discarding
			
 
				+     * all pending work. It is legal to store pending work for another
			
 
				+     * thread to process.
			
 
				+     */
			
 
				+    void abort();
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
@@ -0,0 +1,307 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.Formatter;
			
 
				+import java.util.List;
			
 
				+import java.util.concurrent.Callable;
			
 
				+import java.util.concurrent.ConcurrentHashMap;
			
 
				+import java.util.concurrent.Delayed;
			
 
				+import java.util.concurrent.TimeUnit;
			
 
				+import java.security.PrivilegedExceptionAction;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FSDataOutputStream;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.DataInputBuffer;
			
 
				+import org.apache.hadoop.io.RawComparator;
			
 
				+import org.apache.hadoop.io.WritableComparator;
			
 
				+import org.apache.hadoop.io.WritableUtils;
			
 
				+import org.apache.hadoop.mapred.JobConf;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.Job;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.Partitioner;
			
 
				+import org.apache.hadoop.mapreduce.RecordWriter;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
			
 
				+import org.apache.hadoop.security.UserGroupInformation;
			
 
				+import org.apache.hadoop.tools.rumen.JobStory;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+
			
 
				+/**
			
 
				+ * Synthetic job generated from a trace description.
			
 
				+ */
			
 
				+abstract class GridmixJob implements Callable<Job>, Delayed {
			
 
				+
			
 
				+  public static final String JOBNAME = "GRIDMIX";
			
 
				+  public static final String ORIGNAME = "gridmix.job.name.original";
			
 
				+  public static final Log LOG = LogFactory.getLog(GridmixJob.class);
			
 
				+
			
 
				+  private static final ThreadLocal<Formatter> nameFormat =
			
 
				+    new ThreadLocal<Formatter>() {
			
 
				+      @Override
			
 
				+      protected Formatter initialValue() {
			
 
				+        final StringBuilder sb = new StringBuilder(JOBNAME.length() + 5);
			
 
				+        sb.append(JOBNAME);
			
 
				+        return new Formatter(sb);
			
 
				+      }
			
 
				+    };
			
 
				+
			
 
				+  protected final int seq;
			
 
				+  protected final Path outdir;
			
 
				+  protected final Job job;
			
 
				+  protected final JobStory jobdesc;
			
 
				+  protected final UserGroupInformation ugi;
			
 
				+  protected final long submissionTimeNanos;
			
 
				+  private static final ConcurrentHashMap<Integer,List<InputSplit>> descCache =
			
 
				+     new ConcurrentHashMap<Integer,List<InputSplit>>();
			
 
				+  protected static final String GRIDMIX_JOB_SEQ = "gridmix.job.seq";
			
 
				+  protected static final String GRIDMIX_USE_QUEUE_IN_TRACE = 
			
 
				+      "gridmix.job-submission.use-queue-in-trace";
			
 
				+  protected static final String GRIDMIX_DEFAULT_QUEUE = 
			
 
				+      "gridmix.job-submission.default-queue";
			
 
				+
			
 
				+  private static void setJobQueue(Job job, String queue) {
			
 
				+    if (queue != null)
			
 
				+      job.getConfiguration().set("mapred.job.queue.name", queue);
			
 
				+  }
			
 
				+  
			
 
				+  public GridmixJob(
			
 
				+    final Configuration conf, long submissionMillis, final JobStory jobdesc,
			
 
				+    Path outRoot, UserGroupInformation ugi, final int seq) throws IOException {
			
 
				+    this.ugi = ugi;
			
 
				+    this.jobdesc = jobdesc;
			
 
				+    this.seq = seq;
			
 
				+
			
 
				+    ((StringBuilder)nameFormat.get().out()).setLength(JOBNAME.length());
			
 
				+    try {
			
 
				+      job = this.ugi.doAs(new PrivilegedExceptionAction<Job>() {
			
 
				+        public Job run() throws IOException {
			
 
				+          Job ret = new Job(conf, nameFormat.get().format("%05d", seq)
			
 
				+              .toString());
			
 
				+          ret.getConfiguration().setInt(GRIDMIX_JOB_SEQ, seq);
			
 
				+          ret.getConfiguration().set(ORIGNAME,
			
 
				+              null == jobdesc.getJobID() ? "<unknown>" : jobdesc.getJobID()
			
 
				+                  .toString());
			
 
				+          if (conf.getBoolean(GRIDMIX_USE_QUEUE_IN_TRACE, false)) {
			
 
				+            setJobQueue(ret, jobdesc.getQueueName());
			
 
				+          } else {
			
 
				+            setJobQueue(ret, conf.get(GRIDMIX_DEFAULT_QUEUE));
			
 
				+          }
			
 
				+
			
 
				+          return ret;
			
 
				+        }
			
 
				+      });
			
 
				+    } catch (InterruptedException e) {
			
 
				+      throw new IOException(e);
			
 
				+    }
			
 
				+
			
 
				+    submissionTimeNanos = TimeUnit.NANOSECONDS.convert(
			
 
				+        submissionMillis, TimeUnit.MILLISECONDS);
			
 
				+    outdir = new Path(outRoot, "" + seq);
			
 
				+  }
			
 
				+
			
 
				+  protected GridmixJob(
			
 
				+    final Configuration conf, long submissionMillis, final String name)
			
 
				+  throws IOException {
			
 
				+    submissionTimeNanos = TimeUnit.NANOSECONDS.convert(
			
 
				+        submissionMillis, TimeUnit.MILLISECONDS);
			
 
				+    jobdesc = null;
			
 
				+    outdir = null;
			
 
				+    seq = -1;
			
 
				+    ugi = UserGroupInformation.getCurrentUser();
			
 
				+
			
 
				+    try {
			
 
				+      job = this.ugi.doAs(new PrivilegedExceptionAction<Job>() {
			
 
				+        public Job run() throws IOException {
			
 
				+          Job ret = new Job(conf, name);
			
 
				+          ret.getConfiguration().setInt("gridmix.job.seq", seq);
			
 
				+          setJobQueue(ret, conf.get(GRIDMIX_DEFAULT_QUEUE));
			
 
				+
			
 
				+          return ret;
			
 
				+        }
			
 
				+      });
			
 
				+    } catch (InterruptedException e) {
			
 
				+      throw new IOException(e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public UserGroupInformation getUgi() {
			
 
				+    return ugi;
			
 
				+  }
			
 
				+
			
 
				+  public String toString() {
			
 
				+    return job.getJobName();
			
 
				+  }
			
 
				+
			
 
				+  public long getDelay(TimeUnit unit) {
			
 
				+    return unit.convert(submissionTimeNanos - System.nanoTime(),
			
 
				+        TimeUnit.NANOSECONDS);
			
 
				+  }
			
 
				+
			
 
				+  int id() {
			
 
				+    return seq;
			
 
				+  }
			
 
				+
			
 
				+  Job getJob() {
			
 
				+    return job;
			
 
				+  }
			
 
				+
			
 
				+  JobStory getJobDesc() {
			
 
				+    return jobdesc;
			
 
				+  }
			
 
				+
			
 
				+  static void pushDescription(int seq, List<InputSplit> splits) {
			
 
				+    if (null != descCache.putIfAbsent(seq, splits)) {
			
 
				+      throw new IllegalArgumentException("Description exists for id " + seq);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static List<InputSplit> pullDescription(JobContext jobCtxt) {
			
 
				+    return pullDescription(GridmixJob.getJobSeqId(jobCtxt));
			
 
				+  }
			
 
				+  
			
 
				+  static List<InputSplit> pullDescription(int seq) {
			
 
				+    return descCache.remove(seq);
			
 
				+  }
			
 
				+
			
 
				+  static void clearAll() {
			
 
				+    descCache.clear();
			
 
				+  }
			
 
				+
			
 
				+  void buildSplits(FilePool inputDir) throws IOException {
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public int compareTo(Delayed other) {
			
 
				+    if (this == other) {
			
 
				+      return 0;
			
 
				+    }
			
 
				+    if (other instanceof GridmixJob) {
			
 
				+      final long otherNanos = ((GridmixJob)other).submissionTimeNanos;
			
 
				+      if (otherNanos < submissionTimeNanos) {
			
 
				+        return 1;
			
 
				+      }
			
 
				+      if (otherNanos > submissionTimeNanos) {
			
 
				+        return -1;
			
 
				+      }
			
 
				+      return id() - ((GridmixJob)other).id();
			
 
				+    }
			
 
				+    final long diff =
			
 
				+      getDelay(TimeUnit.NANOSECONDS) - other.getDelay(TimeUnit.NANOSECONDS);
			
 
				+    return 0 == diff ? 0 : (diff > 0 ? 1 : -1);
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  @Override
			
 
				+  public boolean equals(Object other) {
			
 
				+    if (this == other) {
			
 
				+      return true;
			
 
				+    }
			
 
				+    // not possible unless job is cloned; all jobs should be unique
			
 
				+    return other instanceof GridmixJob && id() == ((GridmixJob)other).id();
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public int hashCode() {
			
 
				+    return id();
			
 
				+  }
			
 
				+
			
 
				+  static int getJobSeqId(JobContext job) {
			
 
				+    return job.getConfiguration().getInt(GRIDMIX_JOB_SEQ,-1);
			
 
				+  }
			
 
				+
			
 
				+  public static class DraftPartitioner<V> extends Partitioner<GridmixKey,V> {
			
 
				+    public int getPartition(GridmixKey key, V value, int numReduceTasks) {
			
 
				+      return key.getPartition();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public static class SpecGroupingComparator
			
 
				+      implements RawComparator<GridmixKey> {
			
 
				+    private final DataInputBuffer di = new DataInputBuffer();
			
 
				+    private final byte[] reset = di.getData();
			
 
				+    @Override
			
 
				+    public int compare(GridmixKey g1, GridmixKey g2) {
			
 
				+      final byte t1 = g1.getType();
			
 
				+      final byte t2 = g2.getType();
			
 
				+      if (t1 == GridmixKey.REDUCE_SPEC ||
			
 
				+          t2 == GridmixKey.REDUCE_SPEC) {
			
 
				+        return t1 - t2;
			
 
				+      }
			
 
				+      assert t1 == GridmixKey.DATA;
			
 
				+      assert t2 == GridmixKey.DATA;
			
 
				+      return g1.compareTo(g2);
			
 
				+    }
			
 
				+    @Override
			
 
				+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
			
 
				+      try {
			
 
				+        final int ret;
			
 
				+        di.reset(b1, s1, l1);
			
 
				+        final int x1 = WritableUtils.readVInt(di);
			
 
				+        di.reset(b2, s2, l2);
			
 
				+        final int x2 = WritableUtils.readVInt(di);
			
 
				+        final int t1 = b1[s1 + x1];
			
 
				+        final int t2 = b2[s2 + x2];
			
 
				+        if (t1 == GridmixKey.REDUCE_SPEC ||
			
 
				+            t2 == GridmixKey.REDUCE_SPEC) {
			
 
				+          ret = t1 - t2;
			
 
				+        } else {
			
 
				+          assert t1 == GridmixKey.DATA;
			
 
				+          assert t2 == GridmixKey.DATA;
			
 
				+          ret =
			
 
				+            WritableComparator.compareBytes(b1, s1, x1, b2, s2, x2);
			
 
				+        }
			
 
				+        di.reset(reset, 0, 0);
			
 
				+        return ret;
			
 
				+      } catch (IOException e) {
			
 
				+        throw new RuntimeException(e);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static class RawBytesOutputFormat<K>
			
 
				+      extends FileOutputFormat<K,GridmixRecord> {
			
 
				+
			
 
				+    @Override
			
 
				+    public RecordWriter<K,GridmixRecord> getRecordWriter(
			
 
				+        TaskAttemptContext job) throws IOException {
			
 
				+
			
 
				+      Path file = getDefaultWorkFile(job, "");
			
 
				+      FileSystem fs = file.getFileSystem(job.getConfiguration());
			
 
				+      final FSDataOutputStream fileOut = fs.create(file, false);
			
 
				+      return new RecordWriter<K,GridmixRecord>() {
			
 
				+        @Override
			
 
				+        public void write(K ignored, GridmixRecord value)
			
 
				+            throws IOException {
			
 
				+          value.writeRandom(fileOut, value.getSize());
			
 
				+        }
			
 
				+        @Override
			
 
				+        public void close(TaskAttemptContext ctxt) throws IOException {
			
 
				+          fileOut.close();
			
 
				+        }
			
 
				+      };
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJobSubmissionPolicy.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJobSubmissionPolicy.java
@@ -0,0 +1,87 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ * <p/>
			
 
				+ * http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ * <p/>
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.tools.rumen.JobStoryProducer;
			
 
				+import org.apache.hadoop.mapred.gridmix.Statistics.JobStats;
			
 
				+import org.apache.hadoop.mapred.gridmix.Statistics.ClusterStats;
			
 
				+
			
 
				+import java.util.concurrent.CountDownLatch;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+enum GridmixJobSubmissionPolicy {
			
 
				+
			
 
				+  REPLAY("REPLAY",320000) {
			
 
				+    @Override
			
 
				+    public JobFactory<ClusterStats> createJobFactory(
			
 
				+      JobSubmitter submitter, JobStoryProducer producer, Path scratchDir,
			
 
				+      Configuration conf, CountDownLatch startFlag, UserResolver userResolver)
			
 
				+      throws IOException {
			
 
				+      return new ReplayJobFactory(
			
 
				+        submitter, producer, scratchDir, conf, startFlag,userResolver);
			
 
				+    }},
			
 
				+
			
 
				+  STRESS("STRESS",5000) {
			
 
				+    @Override
			
 
				+    public JobFactory<ClusterStats> createJobFactory(
			
 
				+      JobSubmitter submitter, JobStoryProducer producer, Path scratchDir,
			
 
				+      Configuration conf, CountDownLatch startFlag, UserResolver userResolver)
			
 
				+      throws IOException {
			
 
				+      return new StressJobFactory(
			
 
				+        submitter, producer, scratchDir, conf, startFlag,userResolver);
			
 
				+    }},
			
 
				+
			
 
				+  SERIAL("SERIAL",0) {
			
 
				+    @Override
			
 
				+    public JobFactory<JobStats> createJobFactory(
			
 
				+      JobSubmitter submitter, JobStoryProducer producer, Path scratchDir,
			
 
				+      Configuration conf, CountDownLatch startFlag, UserResolver userResolver)
			
 
				+      throws IOException {
			
 
				+      return new SerialJobFactory(
			
 
				+        submitter, producer, scratchDir, conf, startFlag,userResolver);
			
 
				+    }
			
 
				+  };
			
 
				+
			
 
				+  public static final String JOB_SUBMISSION_POLICY =
			
 
				+    "gridmix.job-submission.policy";
			
 
				+
			
 
				+  private final String name;
			
 
				+  private final int pollingInterval;
			
 
				+
			
 
				+  GridmixJobSubmissionPolicy(String name,int pollingInterval) {
			
 
				+    this.name = name;
			
 
				+    this.pollingInterval = pollingInterval;
			
 
				+  }
			
 
				+
			
 
				+  public abstract JobFactory createJobFactory(
			
 
				+    JobSubmitter submitter, JobStoryProducer producer, Path scratchDir,
			
 
				+    Configuration conf, CountDownLatch startFlag, UserResolver userResolver)
			
 
				+    throws IOException;
			
 
				+
			
 
				+  public int getPollingInterval() {
			
 
				+    return pollingInterval;
			
 
				+  }
			
 
				+
			
 
				+  public static GridmixJobSubmissionPolicy getPolicy(
			
 
				+    Configuration conf, GridmixJobSubmissionPolicy defaultPolicy) {
			
 
				+    String policy = conf.get(JOB_SUBMISSION_POLICY, defaultPolicy.name());
			
 
				+    return valueOf(policy.toUpperCase());
			
 
				+  }
			
 
				+}
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixKey.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixKey.java
@@ -0,0 +1,258 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.DataInput;
			
 
				+import java.io.DataOutput;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import org.apache.hadoop.io.DataInputBuffer;
			
 
				+import org.apache.hadoop.io.Writable;
			
 
				+import org.apache.hadoop.io.WritableUtils;
			
 
				+import org.apache.hadoop.io.WritableComparator;
			
 
				+
			
 
				+class GridmixKey extends GridmixRecord {
			
 
				+  static final byte REDUCE_SPEC = 0;
			
 
				+  static final byte DATA = 1;
			
 
				+
			
 
				+  static final int META_BYTES = 1;
			
 
				+
			
 
				+  private byte type;
			
 
				+  private int partition; // NOT serialized
			
 
				+  private Spec spec = new Spec();
			
 
				+
			
 
				+  GridmixKey() {
			
 
				+    this(DATA, 1, 0L);
			
 
				+  }
			
 
				+  GridmixKey(byte type, int size, long seed) {
			
 
				+    super(size, seed);
			
 
				+    this.type = type;
			
 
				+    // setting type may change pcnt random bytes
			
 
				+    setSize(size);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public int getSize() {
			
 
				+    switch (type) {
			
 
				+      case REDUCE_SPEC:
			
 
				+        return super.getSize() + spec.getSize() + META_BYTES;
			
 
				+      case DATA:
			
 
				+        return super.getSize() + META_BYTES;
			
 
				+      default:
			
 
				+        throw new IllegalStateException("Invalid type: " + type);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void setSize(int size) {
			
 
				+    switch (type) {
			
 
				+      case REDUCE_SPEC:
			
 
				+        super.setSize(size - (META_BYTES + spec.getSize()));
			
 
				+        break;
			
 
				+      case DATA:
			
 
				+        super.setSize(size - META_BYTES);
			
 
				+        break;
			
 
				+      default:
			
 
				+        throw new IllegalStateException("Invalid type: " + type);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Partition is not serialized.
			
 
				+   */
			
 
				+  public int getPartition() {
			
 
				+    return partition;
			
 
				+  }
			
 
				+  public void setPartition(int partition) {
			
 
				+    this.partition = partition;
			
 
				+  }
			
 
				+
			
 
				+  public long getReduceInputRecords() {
			
 
				+    assert REDUCE_SPEC == getType();
			
 
				+    return spec.rec_in;
			
 
				+  }
			
 
				+  public void setReduceInputRecords(long rec_in) {
			
 
				+    assert REDUCE_SPEC == getType();
			
 
				+    final int origSize = getSize();
			
 
				+    spec.rec_in = rec_in;
			
 
				+    setSize(origSize);
			
 
				+  }
			
 
				+
			
 
				+  public long getReduceOutputRecords() {
			
 
				+    assert REDUCE_SPEC == getType();
			
 
				+    return spec.rec_out;
			
 
				+  }
			
 
				+  public void setReduceOutputRecords(long rec_out) {
			
 
				+    assert REDUCE_SPEC == getType();
			
 
				+    final int origSize = getSize();
			
 
				+    spec.rec_out = rec_out;
			
 
				+    setSize(origSize);
			
 
				+  }
			
 
				+
			
 
				+  public long getReduceOutputBytes() {
			
 
				+    assert REDUCE_SPEC == getType();
			
 
				+    return spec.bytes_out;
			
 
				+  };
			
 
				+  public void setReduceOutputBytes(long b_out) {
			
 
				+    assert REDUCE_SPEC == getType();
			
 
				+    final int origSize = getSize();
			
 
				+    spec.bytes_out = b_out;
			
 
				+    setSize(origSize);
			
 
				+  }
			
 
				+
			
 
				+  public byte getType() {
			
 
				+    return type;
			
 
				+  }
			
 
				+  public void setType(byte type) throws IOException {
			
 
				+    final int origSize = getSize();
			
 
				+    switch (type) {
			
 
				+      case REDUCE_SPEC:
			
 
				+      case DATA:
			
 
				+        this.type = type;
			
 
				+        break;
			
 
				+      default:
			
 
				+        throw new IOException("Invalid type: " + type);
			
 
				+    }
			
 
				+    setSize(origSize);
			
 
				+  }
			
 
				+
			
 
				+  public void setSpec(Spec spec) {
			
 
				+    assert REDUCE_SPEC == getType();
			
 
				+    final int origSize = getSize();
			
 
				+    this.spec.set(spec);
			
 
				+    setSize(origSize);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void readFields(DataInput in) throws IOException {
			
 
				+    super.readFields(in);
			
 
				+    setType(in.readByte());
			
 
				+    if (REDUCE_SPEC == getType()) {
			
 
				+      spec.readFields(in);
			
 
				+    }
			
 
				+  }
			
 
				+  @Override
			
 
				+  public void write(DataOutput out) throws IOException {
			
 
				+    super.write(out);
			
 
				+    final byte t = getType();
			
 
				+    out.writeByte(t);
			
 
				+    if (REDUCE_SPEC == t) {
			
 
				+      spec.write(out);
			
 
				+    }
			
 
				+  }
			
 
				+  int fixedBytes() {
			
 
				+    return super.fixedBytes() +
			
 
				+      (REDUCE_SPEC == getType() ? spec.getSize() : 0) + META_BYTES;
			
 
				+  }
			
 
				+  @Override
			
 
				+  public int compareTo(GridmixRecord other) {
			
 
				+    final GridmixKey o = (GridmixKey) other;
			
 
				+    final byte t1 = getType();
			
 
				+    final byte t2 = o.getType();
			
 
				+    if (t1 != t2) {
			
 
				+      return t1 - t2;
			
 
				+    }
			
 
				+    return super.compareTo(other);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Note that while the spec is not explicitly included, changing the spec
			
 
				+   * may change its size, which will affect equality.
			
 
				+   */
			
 
				+  @Override
			
 
				+  public boolean equals(Object other) {
			
 
				+    if (this == other) {
			
 
				+      return true;
			
 
				+    }
			
 
				+    if (other != null && other.getClass() == getClass()) {
			
 
				+      final GridmixKey o = ((GridmixKey)other);
			
 
				+      return getType() == o.getType() && super.equals(o);
			
 
				+    }
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public int hashCode() {
			
 
				+    return super.hashCode() ^ getType();
			
 
				+  }
			
 
				+
			
 
				+  public static class Spec implements Writable {
			
 
				+    long rec_in;
			
 
				+    long rec_out;
			
 
				+    long bytes_out;
			
 
				+    public Spec() { }
			
 
				+
			
 
				+    public void set(Spec other) {
			
 
				+      rec_in = other.rec_in;
			
 
				+      bytes_out = other.bytes_out;
			
 
				+      rec_out = other.rec_out;
			
 
				+    }
			
 
				+
			
 
				+    public int getSize() {
			
 
				+      return WritableUtils.getVIntSize(rec_in) +
			
 
				+             WritableUtils.getVIntSize(rec_out) +
			
 
				+             WritableUtils.getVIntSize(bytes_out);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void readFields(DataInput in) throws IOException {
			
 
				+      rec_in = WritableUtils.readVLong(in);
			
 
				+      rec_out = WritableUtils.readVLong(in);
			
 
				+      bytes_out = WritableUtils.readVLong(in);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void write(DataOutput out) throws IOException {
			
 
				+      WritableUtils.writeVLong(out, rec_in);
			
 
				+      WritableUtils.writeVLong(out, rec_out);
			
 
				+      WritableUtils.writeVLong(out, bytes_out);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public static class Comparator extends GridmixRecord.Comparator {
			
 
				+
			
 
				+    private final DataInputBuffer di = new DataInputBuffer();
			
 
				+    private final byte[] reset = di.getData();
			
 
				+
			
 
				+    public Comparator() {
			
 
				+      super(GridmixKey.class);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
			
 
				+      try {
			
 
				+        di.reset(b1, s1, l1);
			
 
				+        final int x1 = WritableUtils.readVInt(di);
			
 
				+        di.reset(b2, s2, l2);
			
 
				+        final int x2 = WritableUtils.readVInt(di);
			
 
				+        final int ret = (b1[s1 + x1] != b2[s2 + x2])
			
 
				+          ? b1[s1 + x1] - b2[s2 + x2]
			
 
				+          : super.compare(b1, s1, x1, b2, s2, x2);
			
 
				+        di.reset(reset, 0, 0);
			
 
				+        return ret;
			
 
				+      } catch (IOException e) {
			
 
				+        throw new RuntimeException(e);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    static {
			
 
				+      WritableComparator.define(GridmixKey.class, new Comparator());
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixRecord.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixRecord.java
@@ -0,0 +1,215 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.DataInput;
			
 
				+import java.io.DataOutput;
			
 
				+import java.io.EOFException;
			
 
				+import java.io.IOException;
			
 
				+import java.util.Arrays;
			
 
				+
			
 
				+import org.apache.hadoop.io.DataInputBuffer;
			
 
				+import org.apache.hadoop.io.DataOutputBuffer;
			
 
				+import org.apache.hadoop.io.WritableComparable;
			
 
				+import org.apache.hadoop.io.WritableComparator;
			
 
				+import org.apache.hadoop.io.WritableUtils;
			
 
				+
			
 
				+class GridmixRecord implements WritableComparable<GridmixRecord> {
			
 
				+
			
 
				+  private static final int FIXED_BYTES = 1;
			
 
				+  private int size = -1;
			
 
				+  private long seed;
			
 
				+  private final DataInputBuffer dib =
			
 
				+    new DataInputBuffer();
			
 
				+  private final DataOutputBuffer dob =
			
 
				+    new DataOutputBuffer(Long.SIZE / Byte.SIZE);
			
 
				+  private byte[] literal = dob.getData();
			
 
				+
			
 
				+  GridmixRecord() {
			
 
				+    this(1, 0L);
			
 
				+  }
			
 
				+
			
 
				+  GridmixRecord(int size, long seed) {
			
 
				+    this.seed = seed;
			
 
				+    setSizeInternal(size);
			
 
				+  }
			
 
				+
			
 
				+  public int getSize() {
			
 
				+    return size;
			
 
				+  }
			
 
				+
			
 
				+  public void setSize(int size) {
			
 
				+    setSizeInternal(size);
			
 
				+  }
			
 
				+
			
 
				+  private void setSizeInternal(int size) {
			
 
				+    this.size = Math.max(1, size);
			
 
				+    try {
			
 
				+      seed = maskSeed(seed, this.size);
			
 
				+      dob.reset();
			
 
				+      dob.writeLong(seed);
			
 
				+    } catch (IOException e) {
			
 
				+      throw new RuntimeException(e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public final void setSeed(long seed) {
			
 
				+    this.seed = seed;
			
 
				+  }
			
 
				+
			
 
				+  /** Marsaglia, 2003. */
			
 
				+  long nextRand(long x) {
			
 
				+    x ^= (x << 13);
			
 
				+    x ^= (x >>> 7);
			
 
				+    return (x ^= (x << 17));
			
 
				+  }
			
 
				+
			
 
				+  public void writeRandom(DataOutput out, final int size) throws IOException {
			
 
				+    long tmp = seed;
			
 
				+    out.writeLong(tmp);
			
 
				+    int i = size - (Long.SIZE / Byte.SIZE);
			
 
				+    while (i > Long.SIZE / Byte.SIZE - 1) {
			
 
				+      tmp = nextRand(tmp);
			
 
				+      out.writeLong(tmp);
			
 
				+      i -= Long.SIZE / Byte.SIZE;
			
 
				+    }
			
 
				+    for (tmp = nextRand(tmp); i > 0; --i) {
			
 
				+      out.writeByte((int)(tmp & 0xFF));
			
 
				+      tmp >>>= Byte.SIZE;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void readFields(DataInput in) throws IOException {
			
 
				+    size = WritableUtils.readVInt(in);
			
 
				+    int payload = size - WritableUtils.getVIntSize(size);
			
 
				+    if (payload > Long.SIZE / Byte.SIZE) {
			
 
				+      seed = in.readLong();
			
 
				+      payload -= Long.SIZE / Byte.SIZE;
			
 
				+    } else {
			
 
				+      Arrays.fill(literal, (byte)0);
			
 
				+      in.readFully(literal, 0, payload);
			
 
				+      dib.reset(literal, 0, literal.length);
			
 
				+      seed = dib.readLong();
			
 
				+      payload = 0;
			
 
				+    }
			
 
				+    final int vBytes = in.skipBytes(payload);
			
 
				+    if (vBytes != payload) {
			
 
				+      throw new EOFException("Expected " + payload + ", read " + vBytes);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public void write(DataOutput out) throws IOException {
			
 
				+    // data bytes including vint encoding
			
 
				+    WritableUtils.writeVInt(out, size);
			
 
				+    final int payload = size - WritableUtils.getVIntSize(size);
			
 
				+    if (payload > Long.SIZE / Byte.SIZE) {
			
 
				+      writeRandom(out, payload);
			
 
				+    } else if (payload > 0) {
			
 
				+      out.write(literal, 0, payload);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public int compareTo(GridmixRecord other) {
			
 
				+    return compareSeed(other.seed,
			
 
				+        Math.max(0, other.getSize() - other.fixedBytes()));
			
 
				+  }
			
 
				+
			
 
				+  int fixedBytes() {
			
 
				+    // min vint size
			
 
				+    return FIXED_BYTES;
			
 
				+  }
			
 
				+
			
 
				+  private static long maskSeed(long sd, int sz) {
			
 
				+    // Don't use fixedBytes here; subclasses will set intended random len
			
 
				+    if (sz <= FIXED_BYTES) {
			
 
				+      sd = 0L;
			
 
				+    } else if (sz < Long.SIZE / Byte.SIZE + FIXED_BYTES) {
			
 
				+      final int tmp = sz - FIXED_BYTES;
			
 
				+      final long mask = (1L << (Byte.SIZE * tmp)) - 1;
			
 
				+      sd &= mask << (Byte.SIZE * (Long.SIZE / Byte.SIZE - tmp));
			
 
				+    }
			
 
				+    return sd;
			
 
				+  }
			
 
				+
			
 
				+  int compareSeed(long jSeed, int jSize) {
			
 
				+    final int iSize = Math.max(0, getSize() - fixedBytes());
			
 
				+    final int seedLen = Math.min(iSize, jSize) + FIXED_BYTES;
			
 
				+    jSeed = maskSeed(jSeed, seedLen);
			
 
				+    long iSeed = maskSeed(seed, seedLen);
			
 
				+    final int cmplen = Math.min(iSize, jSize);
			
 
				+    for (int i = 0; i < cmplen; i += Byte.SIZE) {
			
 
				+      final int k = cmplen - i;
			
 
				+      for (long j = Long.SIZE - Byte.SIZE;
			
 
				+          j >= Math.max(0, Long.SIZE / Byte.SIZE - k) * Byte.SIZE;
			
 
				+          j -= Byte.SIZE) {
			
 
				+        final int xi = (int)((iSeed >>> j) & 0xFFL);
			
 
				+        final int xj = (int)((jSeed >>> j) & 0xFFL);
			
 
				+        if (xi != xj) {
			
 
				+          return xi - xj;
			
 
				+        }
			
 
				+      }
			
 
				+      iSeed = nextRand(iSeed);
			
 
				+      jSeed = nextRand(jSeed);
			
 
				+    }
			
 
				+    return iSize - jSize;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public boolean equals(Object other) {
			
 
				+    if (this == other) {
			
 
				+      return true;
			
 
				+    }
			
 
				+    if (other != null && other.getClass() == getClass()) {
			
 
				+      final GridmixRecord o = ((GridmixRecord)other);
			
 
				+      return getSize() == o.getSize() && seed == o.seed;
			
 
				+    }
			
 
				+    return false;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public int hashCode() {
			
 
				+    return (int)(seed * getSize());
			
 
				+  }
			
 
				+
			
 
				+  public static class Comparator extends WritableComparator {
			
 
				+
			
 
				+    public Comparator() {
			
 
				+      super(GridmixRecord.class);
			
 
				+    }
			
 
				+
			
 
				+    public Comparator(Class<? extends WritableComparable<?>> sub) {
			
 
				+      super(sub);
			
 
				+    }
			
 
				+
			
 
				+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
			
 
				+      int n1 = WritableUtils.decodeVIntSize(b1[s1]);
			
 
				+      int n2 = WritableUtils.decodeVIntSize(b2[s2]);
			
 
				+      n1 -= WritableUtils.getVIntSize(n1);
			
 
				+      n2 -= WritableUtils.getVIntSize(n2);
			
 
				+      return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2);
			
 
				+    }
			
 
				+
			
 
				+    static {
			
 
				+      WritableComparator.define(GridmixRecord.class, new Comparator());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/InputStriper.java
+++ b/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/InputStriper.java
@@ -0,0 +1,126 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapred.gridmix;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Collections;
			
 
				+import java.util.Comparator;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.List;
			
 
				+import java.util.Map.Entry;
			
 
				+
			
 
				+import org.apache.hadoop.fs.BlockLocation;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+
			
 
				+/**
			
 
				+ * Given a {@link #FilePool}, obtain a set of files capable of satisfying
			
 
				+ * a full set of splits, then iterate over each source to fill the request.
			
 
				+ */
			
 
				+class InputStriper {
			
 
				+  public static final Log LOG = LogFactory.getLog(InputStriper.class);
			
 
				+  int idx;
			
 
				+  long currentStart;
			
 
				+  FileStatus current;
			
 
				+  final List<FileStatus> files = new ArrayList<FileStatus>();
			
 
				+
			
 
				+  /**
			
 
				+   * @param inputDir Pool from which files are requested.
			
 
				+   * @param mapBytes Sum of all expected split requests.
			
 
				+   */
			
 
				+  InputStriper(FilePool inputDir, long mapBytes)
			
 
				+      throws IOException {
			
 
				+    final long inputBytes = inputDir.getInputFiles(mapBytes, files);
			
 
				+    if (mapBytes > inputBytes) {
			
 
				+      LOG.warn("Using " + inputBytes + "/" + mapBytes + " bytes");
			
 
				+    }
			
 
				+    if (files.isEmpty() && mapBytes > 0) {
			
 
				+      throw new IOException("Failed to satisfy request for " + mapBytes);
			
 
				+    }
			
 
				+    current = files.isEmpty() ? null : files.get(0);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * @param inputDir Pool used to resolve block locations.
			
 
				+   * @param bytes Target byte count
			
 
				+   * @param nLocs Number of block locations per split.
			
 
				+   * @return A set of files satisfying the byte count, with locations weighted
			
 
				+   *         to the dominating proportion of input bytes.
			
 
				+   */
			
 
				+  CombineFileSplit splitFor(FilePool inputDir, long bytes, int nLocs)
			
 
				+      throws IOException {
			
 
				+    final ArrayList<Path> paths = new ArrayList<Path>();
			
 
				+    final ArrayList<Long> start = new ArrayList<Long>();
			
 
				+    final ArrayList<Long> length = new ArrayList<Long>();
			
 
				+    final HashMap<String,Double> sb = new HashMap<String,Double>();
			
 
				+    do {
			
 
				+      paths.add(current.getPath());
			
 
				+      start.add(currentStart);
			
 
				+      final long fromFile = Math.min(bytes, current.getLen() - currentStart);
			
 
				+      length.add(fromFile);
			
 
				+      for (BlockLocation loc :
			
 
				+          inputDir.locationsFor(current, currentStart, fromFile)) {
			
 
				+        final double tedium = loc.getLength() / (1.0 * bytes);
			
 
				+        for (String l : loc.getHosts()) {
			
 
				+          Double j = sb.get(l);
			
 
				+          if (null == j) {
			
 
				+            sb.put(l, tedium);
			
 
				+          } else {
			
 
				+            sb.put(l, j.doubleValue() + tedium);
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+      currentStart += fromFile;
			
 
				+      bytes -= fromFile;
			
 
				+      if (current.getLen() - currentStart == 0) {
			
 
				+        current = files.get(++idx % files.size());
			
 
				+        currentStart = 0;
			
 
				+      }
			
 
				+    } while (bytes > 0);
			
 
				+    final ArrayList<Entry<String,Double>> sort =
			
 
				+      new ArrayList<Entry<String,Double>>(sb.entrySet());
			
 
				+    Collections.sort(sort, hostRank);
			
 
				+    final String[] hosts = new String[Math.min(nLocs, sort.size())];
			
 
				+    for (int i = 0; i < nLocs && i < sort.size(); ++i) {
			
 
				+      hosts[i] = sort.get(i).getKey();
			
 
				+    }
			
 
				+    return new CombineFileSplit(paths.toArray(new Path[0]),
			
 
				+        toLongArray(start), toLongArray(length), hosts);
			
 
				+  }
			
 
				+
			
 
				+  private long[] toLongArray(final ArrayList<Long> sigh) {
			
 
				+    final long[] ret = new long[sigh.size()];
			
 
				+    for (int i = 0; i < ret.length; ++i) {
			
 
				+      ret[i] = sigh.get(i);
			
 
				+    }
			
 
				+    return ret;
			
 
				+  }
			
 
				+
			
 
				+  static final Comparator<Entry<String,Double>> hostRank =
			
 
				+    new Comparator<Entry<String,Double>>() {
			
 
				+      public int compare(Entry<String,Double> a, Entry<String,Double> b) {
			
 
				+          final double va = a.getValue();
			
 
				+          final double vb = b.getValue();
			
 
				+          return va > vb ? -1 : va < vb ? 1 : 0;
			
 
				+        }
			
 
				+    };
			
 
				+}