14 anos atrás · f923f55563
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -40,9 +40,6 @@ Release 0.20.204.0 - unreleased
 
															     HDFS-1258. Clearing namespace quota on "/" corrupts fs image.  
														
 
															     (Aaron T. Myers via szetszwo)
														
 
															-    HADOOP-7215. RPC clients must use network interface corresponding to 
														
 
															-    the host in the client's kerberos principal key. (suresh)
														
 
															-
														
 
															     HDFS-1189. Quota counts missed between clear quota and set quota.
														
 
															     (John George via szetszwo)
														
@@ -75,6 +72,26 @@ Release 0.20.204.0 - unreleased
 
															 Release 0.20.203.0 - unreleased
														
 
															+    MAPREDUCE-1280. Update Eclipse plugin to the new eclipse.jdt API.
														
 
															+    (Alex Kozlov via szetszwo)
														
 
															+
														
 
															+    HADOOP-7259. Contrib modules should include the build.properties from
														
 
															+    the enclosing hadoop directory. (omalley)
														
 
															+
														
 
															+    HADOOP-7253. Update the default configuration to fix security audit log
														
 
															+    and metrics2 property configuration warnings. (omalley)
														
 
															+
														
 
															+    HADOOP-7247. Update documentation to match current jar names. (omalley)
														
 
															+
														
 
															+    HADOOP-7246. Update the log4j configuration to match the EventCounter
														
 
															+    package. (Luke Lu via omalley)
														
 
															+
														
 
															+    HADOOP-7143. Restore HadoopArchives. (Joep Rottinghuis via omalley)
														
 
															+
														
 
															+    MAPREDUCE-2316. Updated CapacityScheduler documentation. (acmurthy) 
														
 
															+
														
 
															+    HADOOP-7243. Fix contrib unit tests missing dependencies. (omalley)
														
 
															+
														
 
															     HADOOP-7190. Add metrics v1 back for backwards compatibility. (omalley)
														
 
															     MAPREDUCE-2360. Remove stripping of scheme, authority from submit dir in 
														
@@ -125,8 +142,13 @@ Release 0.20.203.0 - unreleased
 
															     HADOOP-6879. Provide SSH based (Jsch) remote execution API for system
														
 
															     tests. (cos)
														
 
															+    HADOOP-7215. RPC clients must use network interface corresponding to 
														
 
															+    the host in the client's kerberos principal key. (suresh)
														
 
															+
														
 
															     HADOOP-7232. Fix Javadoc warnings. (omalley)
														
 
															+    HADOOP-7258. The Gzip codec should not return null decompressors. (omalley)
														
 
															+
														
 
															 Release 0.20.202.0 - unreleased
														
 
															     MAPREDUCE-2355. Add a configuration knob 
														
@@ -1764,6 +1786,15 @@ Release 0.20.2 - Unreleased
 
															     MAPREDUCE-1163. Remove unused, hard-coded paths from libhdfs. (Allen
														
 
															     Wittenauer via cdouglas)
														
 
															+    HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
														
 
															+    GzipCodec. (Aaron Kimball via cdouglas)
														
 
															+
														
 
															+    HADOOP-6269. Fix threading issue with defaultResource in Configuration.
														
 
															+    (Sreekanth Ramakrishnan via cdouglas)
														
 
															+
														
 
															+    HADOOP-5759. Fix for  IllegalArgumentException when CombineFileInputFormat
														
 
															+    is used as job InputFormat. (Amareshwari Sriramadasu via zshao)
														
 
															+
														
 
															 Release 0.20.1 - 2009-09-01
														
 
															   INCOMPATIBLE CHANGES
														
--- a/conf/hadoop-metrics2.properties.example
+++ b/conf/hadoop-metrics2.properties.example
--- a/conf/log4j.properties
+++ b/conf/log4j.properties
@@ -79,7 +79,8 @@ log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
 
															 log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
														
 
															 log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
														
 
															 #new logger
														
 
															-log4j.category.SecurityLogger=INFO,DRFAS
														
 
															+log4j.logger.SecurityLogger=OFF,console
														
 
															+log4j.logger.SecurityLogger.additivity=false
														
 
															 #
														
 
															 # Rolling File Appender
														
@@ -100,7 +101,7 @@ log4j.category.SecurityLogger=INFO,DRFAS
 
															 # FSNamesystem Audit logging
														
 
															 # All audit events are logged at INFO level
														
 
															 #
														
 
															-log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
														
 
															+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
														
 
															 # Custom Logging levels
														
@@ -117,7 +118,7 @@ log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
 
															 # Event Counter Appender
														
 
															 # Sends counts of logging messages at different severity levels to Hadoop Metrics.
														
 
															 #
														
 
															-log4j.appender.EventCounter=org.apache.hadoop.log.EventCounter
														
 
															+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
														
 
															 #
														
 
															 # Job Summary Appender
														
--- a/ivy/libraries.properties
+++ b/ivy/libraries.properties
@@ -14,7 +14,7 @@
 
															 #It drives ivy and the generation of a maven POM
														
 
															 # This is the version of hadoop we are generating
														
 
															-hadoop.version=0.20.0
														
 
															+hadoop.version=0.20.203.0
														
 
															 hadoop-gpl-compression.version=0.1.0
														
 
															 #These are the versions of our dependencies (in alphabetical order)
														
--- a/src/ant/org/apache/hadoop/ant/condition/DfsBaseConditional.java
+++ b/src/ant/org/apache/hadoop/ant/condition/DfsBaseConditional.java
@@ -56,7 +56,7 @@ public abstract class DfsBaseConditional extends org.apache.hadoop.ant.DfsTask
 
															   protected int postCmd(int exit_code) {
														
 
															     exit_code = super.postCmd(exit_code);
														
 
															-    result = exit_code == 1;
														
 
															+    result = exit_code == 0;
														
 
															     return exit_code;
														
 
															   }
														
--- a/src/c++/libhdfs/hdfs.c
+++ b/src/c++/libhdfs/hdfs.c
@@ -2022,12 +2022,18 @@ hdfsFileInfo *hdfsGetPathInfo(hdfsFS fs, const char* path)
 
															 void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries)
														
 
															 {
														
 
															-    //Free the mName
														
 
															+    //Free the mName, mOwner, and mGroup
														
 
															     int i;
														
 
															     for (i=0; i < numEntries; ++i) {
														
 
															         if (hdfsFileInfo[i].mName) {
														
 
															             free(hdfsFileInfo[i].mName);
														
 
															         }
														
 
															+        if (hdfsFileInfo[i].mOwner) {
														
 
															+            free(hdfsFileInfo[i].mOwner);
														
 
															+        }
														
 
															+        if (hdfsFileInfo[i].mGroup) {
														
 
															+            free(hdfsFileInfo[i].mGroup);
														
 
															+        }
														
 
															     }
														
 
															     //Free entire block
														
--- a/src/c++/libhdfs/hdfsJniHelper.h
+++ b/src/c++/libhdfs/hdfsJniHelper.h
@@ -30,8 +30,6 @@
 
															 #define PATH_SEPARATOR ':'
														
 
															-#define USER_CLASSPATH "/home/y/libexec/hadoop/conf:/home/y/libexec/hadoop/lib/hadoop-0.1.0.jar"
														
 
															-
														
 
															 /** Denote the method we want to invoke as STATIC or INSTANCE */
														
 
															 typedef enum {
														
--- a/src/contrib/build-contrib.xml
+++ b/src/contrib/build-contrib.xml
@@ -23,13 +23,14 @@
 
															   <property name="name" value="${ant.project.name}"/>
														
 
															   <property name="root" value="${basedir}"/>
														
 
															+  <property name="hadoop.root" location="${root}/../../../"/>
														
 
															   <!-- Load all the default properties, and any the user wants    -->
														
 
															   <!-- to contribute (without having to type -D or edit this file -->
														
 
															   <property file="${user.home}/${name}.build.properties" />
														
 
															   <property file="${root}/build.properties" />
														
 
															+  <property file="${hadoop.root}/build.properties" />
														
 
															-  <property name="hadoop.root" location="${root}/../../../"/>
														
 
															   <property name="src.dir"  location="${root}/src/java"/>
														
 
															   <property name="src.test" location="${root}/src/test"/>
														
 
															   <!-- Property added for contrib system tests -->
														
--- a/src/contrib/eclipse-plugin/src/java/org/apache/hadoop/eclipse/launch/HadoopApplicationLaunchShortcut.java
+++ b/src/contrib/eclipse-plugin/src/java/org/apache/hadoop/eclipse/launch/HadoopApplicationLaunchShortcut.java
@@ -32,7 +32,7 @@ import org.eclipse.debug.core.ILaunchConfigurationWorkingCopy;
 
															 import org.eclipse.jdt.core.IJavaProject;
														
 
															 import org.eclipse.jdt.core.IType;
														
 
															 import org.eclipse.jdt.core.JavaCore;
														
 
															-import org.eclipse.jdt.internal.debug.ui.launcher.JavaApplicationLaunchShortcut;
														
 
															+import org.eclipse.jdt.debug.ui.launchConfigurations.JavaApplicationLaunchShortcut;
														
 
															 import org.eclipse.jdt.launching.IJavaLaunchConfigurationConstants;
														
 
															 import org.eclipse.jdt.launching.IRuntimeClasspathEntry;
														
 
															 import org.eclipse.jdt.launching.JavaRuntime;
														
@@ -64,7 +64,7 @@ public class HadoopApplicationLaunchShortcut extends
 
															     // Find an existing or create a launch configuration (Standard way)
														
 
															     ILaunchConfiguration iConf =
														
 
															         super.findLaunchConfiguration(type, configType);
														
 
															-
														
 
															+    if (iConf == null) iConf = super.createConfiguration(type);
														
 
															     ILaunchConfigurationWorkingCopy iConfWC;
														
 
															     try {
														
 
															       /*
														
--- a/src/contrib/eclipse-plugin/src/java/org/apache/hadoop/eclipse/servers/RunOnHadoopWizard.java
+++ b/src/contrib/eclipse-plugin/src/java/org/apache/hadoop/eclipse/servers/RunOnHadoopWizard.java
@@ -159,9 +159,9 @@ public class RunOnHadoopWizard extends Wizard {
 
															     // Write it to the disk file
														
 
															     try {
														
 
															-      // File confFile = File.createTempFile("hadoop-site-", ".xml",
														
 
															+      // File confFile = File.createTempFile("core-site-", ".xml",
														
 
															       // confDir);
														
 
															-      File confFile = new File(confDir, "hadoop-site.xml");
														
 
															+      File confFile = new File(confDir, "core-site.xml");
														
 
															       FileOutputStream fos = new FileOutputStream(confFile);
														
 
															       conf.writeXml(fos);
														
 
															       fos.close();
														
--- a/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairSchedulerServlet.java
+++ b/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairSchedulerServlet.java
@@ -18,7 +18,9 @@
 
															 package org.apache.hadoop.mapred;
														
 
															+import java.io.ByteArrayOutputStream;
														
 
															 import java.io.IOException;
														
 
															+import java.io.OutputStream;
														
 
															 import java.io.PrintWriter;
														
 
															 import java.text.DateFormat;
														
 
															 import java.text.SimpleDateFormat;
														
@@ -122,7 +124,12 @@ public class FairSchedulerServlet extends HttpServlet {
 
															     }
														
 
															     // Print out the normal response
														
 
															     response.setContentType("text/html");
														
 
															-    PrintWriter out = new PrintWriter(response.getOutputStream());
														
 
															+
														
 
															+    // Because the client may read arbitrarily slow, and we hold locks while
														
 
															+    // the servlet output, we want to write to our own buffer which we know
														
 
															+    // won't block.
														
 
															+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
														
 
															+    PrintWriter out = new PrintWriter(baos);
														
 
															     String hostname = StringUtils.simpleHostname(
														
 
															         jobTracker.getJobTrackerMachine());
														
 
															     out.print("<html><head>");
														
@@ -137,6 +144,11 @@ public class FairSchedulerServlet extends HttpServlet {
 
															     showAdminForm(out, advancedView);
														
 
															     out.print("</body></html>\n");
														
 
															     out.close();
														
 
															+
														
 
															+    // Flush our buffer to the real servlet output
														
 
															+    OutputStream servletOut = response.getOutputStream();
														
 
															+    baos.writeTo(servletOut);
														
 
															+    servletOut.close();
														
 
															   }
														
 
															   /**
														
@@ -206,51 +218,53 @@ public class FairSchedulerServlet extends HttpServlet {
 
															     out.print("<th>Finished</th><th>Running</th><th>Fair Share</th>" +
														
 
															         (advancedView ? "<th>Weight</th><th>Deficit</th><th>minReduces</th>" : ""));
														
 
															     out.print("</tr>\n");
														
 
															-    Collection<JobInProgress> runningJobs = jobTracker.getRunningJobs();
														
 
															-    synchronized (scheduler) {
														
 
															-      for (JobInProgress job: runningJobs) {
														
 
															-        JobProfile profile = job.getProfile();
														
 
															-        JobInfo info = scheduler.infos.get(job);
														
 
															-        if (info == null) { // Job finished, but let's show 0's for info
														
 
															-          info = new JobInfo();
														
 
															-        }
														
 
															-        out.print("<tr>\n");
														
 
															-        out.printf("<td>%s</td>\n", DATE_FORMAT.format(
														
 
															-            new Date(job.getStartTime())));
														
 
															-        out.printf("<td><a href=\"jobdetails.jsp?jobid=%s\">%s</a></td>",
														
 
															-            profile.getJobID(), profile.getJobID());
														
 
															-        out.printf("<td>%s</td>\n", profile.getUser());
														
 
															-        out.printf("<td>%s</td>\n", profile.getJobName());
														
 
															-        out.printf("<td>%s</td>\n", generateSelect(
														
 
															-            scheduler.getPoolManager().getPoolNames(),
														
 
															-            scheduler.getPoolManager().getPoolName(job),
														
 
															-            "/scheduler?setPool=<CHOICE>&jobid=" + profile.getJobID() +
														
 
															-            (advancedView ? "&advanced" : "")));
														
 
															-        out.printf("<td>%s</td>\n", generateSelect(
														
 
															-            Arrays.asList(new String[]
														
 
															-                {"VERY_LOW", "LOW", "NORMAL", "HIGH", "VERY_HIGH"}),
														
 
															-            job.getPriority().toString(),
														
 
															-            "/scheduler?setPriority=<CHOICE>&jobid=" + profile.getJobID() +
														
 
															-            (advancedView ? "&advanced" : "")));
														
 
															-        out.printf("<td>%d / %d</td><td>%d</td><td>%8.1f</td>\n",
														
 
															-            job.finishedMaps(), job.desiredMaps(), info.runningMaps,
														
 
															-            info.mapFairShare);
														
 
															-        if (advancedView) {
														
 
															-          out.printf("<td>%8.1f</td>\n", info.mapWeight);
														
 
															-          out.printf("<td>%s</td>\n", info.neededMaps > 0 ?
														
 
															-              (info.mapDeficit / 1000) + "s" : "--");
														
 
															-          out.printf("<td>%d</td>\n", info.minMaps);
														
 
															-        }
														
 
															-        out.printf("<td>%d / %d</td><td>%d</td><td>%8.1f</td>\n",
														
 
															-            job.finishedReduces(), job.desiredReduces(), info.runningReduces,
														
 
															-            info.reduceFairShare);
														
 
															-        if (advancedView) {
														
 
															-          out.printf("<td>%8.1f</td>\n", info.reduceWeight);
														
 
															-          out.printf("<td>%s</td>\n", info.neededReduces > 0 ?
														
 
															-              (info.reduceDeficit / 1000) + "s" : "--");
														
 
															-          out.printf("<td>%d</td>\n", info.minReduces);
														
 
															+    synchronized (jobTracker) {
														
 
															+      Collection<JobInProgress> runningJobs = jobTracker.getRunningJobs();
														
 
															+      synchronized (scheduler) {
														
 
															+        for (JobInProgress job: runningJobs) {
														
 
															+          JobProfile profile = job.getProfile();
														
 
															+          JobInfo info = scheduler.infos.get(job);
														
 
															+          if (info == null) { // Job finished, but let's show 0's for info
														
 
															+            info = new JobInfo();
														
 
															+          }
														
 
															+          out.print("<tr>\n");
														
 
															+          out.printf("<td>%s</td>\n", DATE_FORMAT.format(
														
 
															+                       new Date(job.getStartTime())));
														
 
															+          out.printf("<td><a href=\"jobdetails.jsp?jobid=%s\">%s</a></td>",
														
 
															+                     profile.getJobID(), profile.getJobID());
														
 
															+          out.printf("<td>%s</td>\n", profile.getUser());
														
 
															+          out.printf("<td>%s</td>\n", profile.getJobName());
														
 
															+          out.printf("<td>%s</td>\n", generateSelect(
														
 
															+                       scheduler.getPoolManager().getPoolNames(),
														
 
															+                       scheduler.getPoolManager().getPoolName(job),
														
 
															+                       "/scheduler?setPool=<CHOICE>&jobid=" + profile.getJobID() +
														
 
															+                       (advancedView ? "&advanced" : "")));
														
 
															+          out.printf("<td>%s</td>\n", generateSelect(
														
 
															+                       Arrays.asList(new String[]
														
 
															+                         {"VERY_LOW", "LOW", "NORMAL", "HIGH", "VERY_HIGH"}),
														
 
															+                       job.getPriority().toString(),
														
 
															+                       "/scheduler?setPriority=<CHOICE>&jobid=" + profile.getJobID() +
														
 
															+                       (advancedView ? "&advanced" : "")));
														
 
															+          out.printf("<td>%d / %d</td><td>%d</td><td>%8.1f</td>\n",
														
 
															+                     job.finishedMaps(), job.desiredMaps(), info.runningMaps,
														
 
															+                     info.mapFairShare);
														
 
															+          if (advancedView) {
														
 
															+            out.printf("<td>%8.1f</td>\n", info.mapWeight);
														
 
															+            out.printf("<td>%s</td>\n", info.neededMaps > 0 ?
														
 
															+                       (info.mapDeficit / 1000) + "s" : "--");
														
 
															+            out.printf("<td>%d</td>\n", info.minMaps);
														
 
															+          }
														
 
															+          out.printf("<td>%d / %d</td><td>%d</td><td>%8.1f</td>\n",
														
 
															+                     job.finishedReduces(), job.desiredReduces(), info.runningReduces,
														
 
															+                     info.reduceFairShare);
														
 
															+          if (advancedView) {
														
 
															+            out.printf("<td>%8.1f</td>\n", info.reduceWeight);
														
 
															+            out.printf("<td>%s</td>\n", info.neededReduces > 0 ?
														
 
															+                       (info.reduceDeficit / 1000) + "s" : "--");
														
 
															+            out.printf("<td>%d</td>\n", info.minReduces);
														
 
															+          }
														
 
															+          out.print("</tr>\n");
														
 
															         }
														
 
															-        out.print("</tr>\n");
														
 
															       }
														
 
															     }
														
 
															     out.print("</table>\n");
														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapRed.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapRed.java
@@ -385,7 +385,11 @@ public abstract class PipeMapRed {
 
															           if (now-lastStdoutReport > reporterOutDelay_) {
														
 
															             lastStdoutReport = now;
														
 
															             String hline = "Records R/W=" + numRecRead_ + "/" + numRecWritten_;
														
 
															-            reporter.setStatus(hline);
														
 
															+            if (!processProvidedStatus_) {
														
 
															+              reporter.setStatus(hline);
														
 
															+            } else {
														
 
															+              reporter.progress();
														
 
															+            }
														
 
															             logprintln(hline);
														
 
															             logflush();
														
 
															           }
														
@@ -446,6 +450,7 @@ public abstract class PipeMapRed {
 
															             if (matchesCounter(lineStr)) {
														
 
															               incrCounter(lineStr);
														
 
															             } else if (matchesStatus(lineStr)) {
														
 
															+              processProvidedStatus_ = true;
														
 
															               setStatus(lineStr);
														
 
															             } else {
														
 
															               LOG.warn("Cannot parse reporter line: " + lineStr);
														
@@ -671,4 +676,5 @@ public abstract class PipeMapRed {
 
															   String LOGNAME;
														
 
															   PrintStream log_;
														
 
															+  volatile boolean processProvidedStatus_ = false;
														
 
															 }
														
--- a/src/contrib/streaming/src/test/org/apache/hadoop/streaming/StderrApp.java
+++ b/src/contrib/streaming/src/test/org/apache/hadoop/streaming/StderrApp.java
@@ -32,8 +32,16 @@ public class StderrApp
 
															    * postWriteLines to stderr.
														
 
															    */
														
 
															   public static void go(int preWriteLines, int sleep, int postWriteLines) throws IOException {
														
 
															+    go(preWriteLines, sleep, postWriteLines, false);
														
 
															+  }
														
 
															+  
														
 
															+  public static void go(int preWriteLines, int sleep, int postWriteLines, boolean status) throws IOException {
														
 
															     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
														
 
															     String line;
														
 
															+    
														
 
															+    if (status) {
														
 
															+      System.err.println("reporter:status:starting echo");
														
 
															+    }      
														
 
															     while (preWriteLines > 0) {
														
 
															       --preWriteLines;
														
@@ -57,13 +65,14 @@ public class StderrApp
 
															   public static void main(String[] args) throws IOException {
														
 
															     if (args.length < 3) {
														
 
															-      System.err.println("Usage: StderrApp PREWRITE SLEEP POSTWRITE");
														
 
															+      System.err.println("Usage: StderrApp PREWRITE SLEEP POSTWRITE [STATUS]");
														
 
															       return;
														
 
															     }
														
 
															     int preWriteLines = Integer.parseInt(args[0]);
														
 
															     int sleep = Integer.parseInt(args[1]);
														
 
															     int postWriteLines = Integer.parseInt(args[2]);
														
 
															+    boolean status = args.length > 3 ? Boolean.parseBoolean(args[3]) : false;
														
 
															-    go(preWriteLines, sleep, postWriteLines);
														
 
															+    go(preWriteLines, sleep, postWriteLines, status);
														
 
															   }
														
 
															 }
														
--- a/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingStatus.java
+++ b/src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingStatus.java
@@ -0,0 +1,101 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.streaming;
														
 
															+
														
 
															+import java.io.DataOutputStream;
														
 
															+import java.io.IOException;
														
 
															+import java.io.File;
														
 
															+
														
 
															+import junit.framework.TestCase;
														
 
															+
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.mapred.MiniMRCluster;
														
 
															+import org.apache.hadoop.mapred.TaskReport;
														
 
															+
														
 
															+/**
														
 
															+ * Tests for the ability of a streaming task to set the status
														
 
															+ * by writing "reporter:status:" lines to stderr. Uses MiniMR
														
 
															+ * since the local jobtracker doesn't track status.
														
 
															+ */
														
 
															+public class TestStreamingStatus extends TestCase {
														
 
															+  private static String TEST_ROOT_DIR =
														
 
															+    new File(System.getProperty("test.build.data","/tmp"))
														
 
															+    .toURI().toString().replace(' ', '+');
														
 
															+  protected String INPUT_FILE = TEST_ROOT_DIR + "/input.txt";
														
 
															+  protected String OUTPUT_DIR = TEST_ROOT_DIR + "/out";
														
 
															+  protected String input = "roses.are.red\nviolets.are.blue\nbunnies.are.pink\n";
														
 
															+  protected String map = StreamUtil.makeJavaCommand(StderrApp.class, new String[]{"3", "0", "0", "true"});
														
 
															+
														
 
															+  protected String[] genArgs(int jobtrackerPort) {
														
 
															+    return new String[] {
														
 
															+      "-input", INPUT_FILE,
														
 
															+      "-output", OUTPUT_DIR,
														
 
															+      "-mapper", map,
														
 
															+      "-jobconf", "mapred.map.tasks=1",
														
 
															+      "-jobconf", "mapred.reduce.tasks=0",      
														
 
															+      "-jobconf", "keep.failed.task.files=true",
														
 
															+      "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp"),
														
 
															+      "-jobconf", "mapred.job.tracker=localhost:"+jobtrackerPort,
														
 
															+      "-jobconf", "fs.default.name=file:///"
														
 
															+    };
														
 
															+  }
														
 
															+  
														
 
															+  public void makeInput(FileSystem fs) throws IOException {
														
 
															+    Path inFile = new Path(INPUT_FILE);
														
 
															+    DataOutputStream file = fs.create(inFile);
														
 
															+    file.writeBytes(input);
														
 
															+    file.close();
														
 
															+  }
														
 
															+
														
 
															+  public void clean(FileSystem fs) {
														
 
															+    try {
														
 
															+      Path outDir = new Path(OUTPUT_DIR);
														
 
															+      fs.delete(outDir, true);
														
 
															+    } catch (Exception e) {}
														
 
															+    try {
														
 
															+      Path inFile = new Path(INPUT_FILE);    
														
 
															+      fs.delete(inFile, false);
														
 
															+    } catch (Exception e) {}
														
 
															+  }
														
 
															+  
														
 
															+  public void testStreamingStatus() throws Exception {
														
 
															+    MiniMRCluster mr = null;
														
 
															+    FileSystem fs = null;
														
 
															+    try {
														
 
															+      mr = new MiniMRCluster(1, "file:///", 3);
														
 
															+
														
 
															+      Path inFile = new Path(INPUT_FILE);
														
 
															+      fs = inFile.getFileSystem(mr.createJobConf());
														
 
															+      clean(fs);
														
 
															+      makeInput(fs);
														
 
															+      
														
 
															+      StreamJob job = new StreamJob();
														
 
															+      int failed = job.run(genArgs(mr.getJobTrackerPort()));
														
 
															+      assertEquals(0, failed);
														
 
															+
														
 
															+      TaskReport[] reports = job.jc_.getMapTaskReports(job.jobId_);
														
 
															+      assertEquals(1, reports.length);
														
 
															+      assertEquals("starting echo", reports[0].getState());
														
 
															+    } finally {
														
 
															+      if (fs != null) { clean(fs); }
														
 
															+      if (mr != null) { mr.shutdown(); }
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/src/core/org/apache/hadoop/conf/Configuration.java
+++ b/src/core/org/apache/hadoop/conf/Configuration.java
@@ -43,6 +43,7 @@ import java.util.Properties;
 
															 import java.util.Set;
														
 
															 import java.util.StringTokenizer;
														
 
															 import java.util.WeakHashMap;
														
 
															+import java.util.concurrent.CopyOnWriteArrayList;
														
 
															 import java.util.regex.Matcher;
														
 
															 import java.util.regex.Pattern;
														
@@ -165,8 +166,8 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
 
															    * List of default Resources. Resources are loaded in the order of the list 
														
 
															    * entries
														
 
															    */
														
 
															-  private static final ArrayList<String> defaultResources = 
														
 
															-    new ArrayList<String>();
														
 
															+  private static final CopyOnWriteArrayList<String> defaultResources =
														
 
															+    new CopyOnWriteArrayList<String>();
														
 
															   /**
														
 
															    * Flag to indicate if the storage of resource which updates a key needs 
														
@@ -1356,7 +1357,7 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
 
															     return sb.toString();
														
 
															   }
														
 
															-  private void toString(ArrayList resources, StringBuffer sb) {
														
 
															+  private void toString(List resources, StringBuffer sb) {
														
 
															     ListIterator i = resources.listIterator();
														
 
															     while (i.hasNext()) {
														
 
															       if (i.nextIndex() != 0) {
														
--- a/src/core/org/apache/hadoop/fs/HarFileSystem.java
+++ b/src/core/org/apache/hadoop/fs/HarFileSystem.java
@@ -0,0 +1,882 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+package org.apache.hadoop.fs;
														
 
															+
														
 
															+import java.io.FileNotFoundException;
														
 
															+import java.io.IOException;
														
 
															+import java.net.URI;
														
 
															+import java.net.URISyntaxException;
														
 
															+import java.util.ArrayList;
														
 
															+import java.util.List;
														
 
															+
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.permission.FsPermission;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.util.LineReader;
														
 
															+import org.apache.hadoop.util.Progressable;
														
 
															+
														
 
															+/**
														
 
															+ * This is an implementation of the Hadoop Archive 
														
 
															+ * Filesystem. This archive Filesystem has index files
														
 
															+ * of the form _index* and has contents of the form
														
 
															+ * part-*. The index files store the indexes of the 
														
 
															+ * real files. The index files are of the form _masterindex
														
 
															+ * and _index. The master index is a level of indirection 
														
 
															+ * in to the index file to make the look ups faster. the index
														
 
															+ * file is sorted with hash code of the paths that it contains 
														
 
															+ * and the master index contains pointers to the positions in 
														
 
															+ * index for ranges of hashcodes.
														
 
															+ */
														
 
															+
														
 
															+public class HarFileSystem extends FilterFileSystem {
														
 
															+  public static final int VERSION = 1;
														
 
															+  // uri representation of this Har filesystem
														
 
															+  private URI uri;
														
 
															+  // the version of this har filesystem
														
 
															+  private int version;
														
 
															+  // underlying uri 
														
 
															+  private URI underLyingURI;
														
 
															+  // the top level path of the archive
														
 
															+  // in the underlying file system
														
 
															+  private Path archivePath;
														
 
															+  // the masterIndex of the archive
														
 
															+  private Path masterIndex;
														
 
															+  // the index file 
														
 
															+  private Path archiveIndex;
														
 
															+  // the har auth
														
 
															+  private String harAuth;
														
 
															+  
														
 
															+  /**
														
 
															+   * public construction of harfilesystem
														
 
															+   *
														
 
															+   */
														
 
															+  public HarFileSystem() {
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Constructor to create a HarFileSystem with an
														
 
															+   * underlying filesystem.
														
 
															+   * @param fs
														
 
															+   */
														
 
															+  public HarFileSystem(FileSystem fs) {
														
 
															+    super(fs);
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Initialize a Har filesystem per har archive. The 
														
 
															+   * archive home directory is the top level directory
														
 
															+   * in the filesystem that contains the HAR archive.
														
 
															+   * Be careful with this method, you do not want to go 
														
 
															+   * on creating new Filesystem instances per call to 
														
 
															+   * path.getFileSystem().
														
 
															+   * the uri of Har is 
														
 
															+   * har://underlyingfsscheme-host:port/archivepath.
														
 
															+   * or 
														
 
															+   * har:///archivepath. This assumes the underlying filesystem
														
 
															+   * to be used in case not specified.
														
 
															+   */
														
 
															+  public void initialize(URI name, Configuration conf) throws IOException {
														
 
															+    //decode the name
														
 
															+    underLyingURI = decodeHarURI(name, conf);
														
 
															+    //  we got the right har Path- now check if this is 
														
 
															+    //truly a har filesystem
														
 
															+    Path harPath = archivePath(new Path(name.toString()));
														
 
															+    if (harPath == null) { 
														
 
															+      throw new IOException("Invalid path for the Har Filesystem. " + 
														
 
															+                           name.toString());
														
 
															+    }
														
 
															+    if (fs == null) {
														
 
															+      fs = FileSystem.get(underLyingURI, conf);
														
 
															+    }
														
 
															+    this.uri = harPath.toUri();
														
 
															+    this.archivePath = new Path(this.uri.getPath());
														
 
															+    this.harAuth = getHarAuth(this.underLyingURI);
														
 
															+    //check for the underlying fs containing
														
 
															+    // the index file
														
 
															+    this.masterIndex = new Path(archivePath, "_masterindex");
														
 
															+    this.archiveIndex = new Path(archivePath, "_index");
														
 
															+    if (!fs.exists(masterIndex) || !fs.exists(archiveIndex)) {
														
 
															+      throw new IOException("Invalid path for the Har Filesystem. " +
														
 
															+          "No index file in " + harPath);
														
 
															+    }
														
 
															+    try{ 
														
 
															+      this.version = getHarVersion();
														
 
															+    } catch(IOException io) {
														
 
															+      throw new IOException("Unable to " +
														
 
															+          "read the version of the Har file system: " + this.archivePath);
														
 
															+    }
														
 
															+    if (this.version != HarFileSystem.VERSION) {
														
 
															+      throw new IOException("Invalid version " + 
														
 
															+          this.version + " expected " + HarFileSystem.VERSION);
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  // get the version of the filesystem from the masterindex file
														
 
															+  // the version is currently not useful since its the first version 
														
 
															+  // of archives
														
 
															+  public int getHarVersion() throws IOException { 
														
 
															+    FSDataInputStream masterIn = fs.open(masterIndex);
														
 
															+    LineReader lmaster = new LineReader(masterIn, getConf());
														
 
															+    Text line = new Text();
														
 
															+    lmaster.readLine(line);
														
 
															+    try {
														
 
															+      masterIn.close();
														
 
															+    } catch(IOException e){
														
 
															+      //disregard it.
														
 
															+      // its a read.
														
 
															+    }
														
 
															+    String versionLine = line.toString();
														
 
															+    String[] arr = versionLine.split(" ");
														
 
															+    int version = Integer.parseInt(arr[0]);
														
 
															+    return version;
														
 
															+  }
														
 
															+  
														
 
															+  /*
														
 
															+   * find the parent path that is the 
														
 
															+   * archive path in the path. The last
														
 
															+   * path segment that ends with .har is 
														
 
															+   * the path that will be returned.
														
 
															+   */
														
 
															+  private Path archivePath(Path p) {
														
 
															+    Path retPath = null;
														
 
															+    Path tmp = p;
														
 
															+    for (int i=0; i< p.depth(); i++) {
														
 
															+      if (tmp.toString().endsWith(".har")) {
														
 
															+        retPath = tmp;
														
 
															+        break;
														
 
															+      }
														
 
															+      tmp = tmp.getParent();
														
 
															+    }
														
 
															+    return retPath;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * decode the raw URI to get the underlying URI
														
 
															+   * @param rawURI raw Har URI
														
 
															+   * @return filtered URI of the underlying fileSystem
														
 
															+   */
														
 
															+  private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException {
														
 
															+    String tmpAuth = rawURI.getAuthority();
														
 
															+    //we are using the default file
														
 
															+    //system in the config 
														
 
															+    //so create a underlying uri and 
														
 
															+    //return it
														
 
															+    if (tmpAuth == null) {
														
 
															+      //create a path 
														
 
															+      return FileSystem.getDefaultUri(conf);
														
 
															+    }
														
 
															+    String host = rawURI.getHost();
														
 
															+    String[] str = host.split("-", 2);
														
 
															+    if (str[0] == null) {
														
 
															+      throw new IOException("URI: " + rawURI + " is an invalid Har URI.");
														
 
															+    }
														
 
															+    String underLyingScheme = str[0];
														
 
															+    String underLyingHost = (str.length > 1)? str[1]:null;
														
 
															+    int underLyingPort = rawURI.getPort();
														
 
															+    String auth = (underLyingHost == null && underLyingPort == -1)?
														
 
															+                  null:(underLyingHost+":"+underLyingPort);
														
 
															+    URI tmp = null;
														
 
															+    if (rawURI.getQuery() != null) {
														
 
															+      // query component not allowed
														
 
															+      throw new IOException("query component in Path not supported  " + rawURI);
														
 
															+    }
														
 
															+    try {
														
 
															+      tmp = new URI(underLyingScheme, auth, rawURI.getPath(), 
														
 
															+            rawURI.getQuery(), rawURI.getFragment());
														
 
															+    } catch (URISyntaxException e) {
														
 
															+        // do nothing should not happen
														
 
															+    }
														
 
															+    return tmp;
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * return the top level archive.
														
 
															+   */
														
 
															+  public Path getWorkingDirectory() {
														
 
															+    return new Path(uri.toString());
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Create a har specific auth 
														
 
															+   * har-underlyingfs:port
														
 
															+   * @param underLyingURI the uri of underlying
														
 
															+   * filesystem
														
 
															+   * @return har specific auth
														
 
															+   */
														
 
															+  private String getHarAuth(URI underLyingUri) {
														
 
															+    String auth = underLyingUri.getScheme() + "-";
														
 
															+    if (underLyingUri.getHost() != null) {
														
 
															+      auth += underLyingUri.getHost() + ":";
														
 
															+      if (underLyingUri.getPort() != -1) {
														
 
															+        auth +=  underLyingUri.getPort();
														
 
															+      }
														
 
															+    }
														
 
															+    else {
														
 
															+      auth += ":";
														
 
															+    }
														
 
															+    return auth;
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Returns the uri of this filesystem.
														
 
															+   * The uri is of the form 
														
 
															+   * har://underlyingfsschema-host:port/pathintheunderlyingfs
														
 
															+   */
														
 
															+  @Override
														
 
															+  public URI getUri() {
														
 
															+    return this.uri;
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * this method returns the path 
														
 
															+   * inside the har filesystem.
														
 
															+   * this is relative path inside 
														
 
															+   * the har filesystem.
														
 
															+   * @param path the fully qualified path in the har filesystem.
														
 
															+   * @return relative path in the filesystem.
														
 
															+   */
														
 
															+  private Path getPathInHar(Path path) {
														
 
															+    Path harPath = new Path(path.toUri().getPath());
														
 
															+    if (archivePath.compareTo(harPath) == 0)
														
 
															+      return new Path(Path.SEPARATOR);
														
 
															+    Path tmp = new Path(harPath.getName());
														
 
															+    Path parent = harPath.getParent();
														
 
															+    while (!(parent.compareTo(archivePath) == 0)) {
														
 
															+      if (parent.toString().equals(Path.SEPARATOR)) {
														
 
															+        tmp = null;
														
 
															+        break;
														
 
															+      }
														
 
															+      tmp = new Path(parent.getName(), tmp);
														
 
															+      parent = parent.getParent();
														
 
															+    }
														
 
															+    if (tmp != null) 
														
 
															+      tmp = new Path(Path.SEPARATOR, tmp);
														
 
															+    return tmp;
														
 
															+  }
														
 
															+  
														
 
															+  //the relative path of p. basically 
														
 
															+  // getting rid of /. Parsing and doing 
														
 
															+  // string manipulation is not good - so
														
 
															+  // just use the path api to do it.
														
 
															+  private Path makeRelative(String initial, Path p) {
														
 
															+    Path root = new Path(Path.SEPARATOR);
														
 
															+    if (root.compareTo(p) == 0)
														
 
															+      return new Path(initial);
														
 
															+    Path retPath = new Path(p.getName());
														
 
															+    Path parent = p.getParent();
														
 
															+    for (int i=0; i < p.depth()-1; i++) {
														
 
															+      retPath = new Path(parent.getName(), retPath);
														
 
															+      parent = parent.getParent();
														
 
															+    }
														
 
															+    return new Path(initial, retPath.toString());
														
 
															+  }
														
 
															+  
														
 
															+  /* this makes a path qualified in the har filesystem
														
 
															+   * (non-Javadoc)
														
 
															+   * @see org.apache.hadoop.fs.FilterFileSystem#makeQualified(
														
 
															+   * org.apache.hadoop.fs.Path)
														
 
															+   */
														
 
															+  @Override
														
 
															+  public Path makeQualified(Path path) {
														
 
															+    // make sure that we just get the 
														
 
															+    // path component 
														
 
															+    Path fsPath = path;
														
 
															+    if (!path.isAbsolute()) {
														
 
															+      fsPath = new Path(archivePath, path);
														
 
															+    }
														
 
															+
														
 
															+    URI tmpURI = fsPath.toUri();
														
 
															+    //change this to Har uri 
														
 
															+    return new Path(uri.getScheme(), harAuth, tmpURI.getPath());
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * get block locations from the underlying fs
														
 
															+   * @param file the input filestatus to get block locations
														
 
															+   * @param start the start in the file
														
 
															+   * @param len the length in the file
														
 
															+   * @return block locations for this segment of file
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
														
 
															+      long len) throws IOException {
														
 
															+    // need to look up the file in the underlying fs
														
 
															+    // look up the index 
														
 
															+    
														
 
															+    // make sure this is a prt of this har filesystem
														
 
															+    Path p = makeQualified(file.getPath());
														
 
															+    Path harPath = getPathInHar(p);
														
 
															+    String line = fileStatusInIndex(harPath);
														
 
															+    if (line == null)  {
														
 
															+      throw new FileNotFoundException("File " + file.getPath() + " not found");
														
 
															+    }
														
 
															+    HarStatus harStatus = new HarStatus(line);
														
 
															+    if (harStatus.isDir()) {
														
 
															+      return new BlockLocation[0];
														
 
															+    }
														
 
															+    FileStatus fsFile = fs.getFileStatus(new Path(archivePath,
														
 
															+        harStatus.getPartName()));
														
 
															+    BlockLocation[] rawBlocks = fs.getFileBlockLocations(fsFile, 
														
 
															+        harStatus.getStartIndex() + start, len);
														
 
															+    return fakeBlockLocations(rawBlocks, harStatus.getStartIndex());
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * fake the rawblocks since map reduce uses the block offsets to 
														
 
															+   * fo some computations regarding the blocks
														
 
															+   * @param rawBlocks the raw blocks returned by the filesystem
														
 
															+   * @return faked blocks with changed offsets.
														
 
															+   */
														
 
															+  private BlockLocation[] fakeBlockLocations(BlockLocation[] rawBlocks, 
														
 
															+		  long startIndex) {
														
 
															+	for (BlockLocation block : rawBlocks) {
														
 
															+		long rawOffset = block.getOffset();
														
 
															+		block.setOffset(rawOffset - startIndex);
														
 
															+	}
														
 
															+	return rawBlocks;
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * the hash of the path p inside iniside
														
 
															+   * the filesystem
														
 
															+   * @param p the path in the harfilesystem
														
 
															+   * @return the hash code of the path.
														
 
															+   */
														
 
															+  public static int getHarHash(Path p) {
														
 
															+    return (p.toString().hashCode() & 0x7fffffff);
														
 
															+  }
														
 
															+  
														
 
															+  static class Store {
														
 
															+    public Store() {
														
 
															+      begin = end = startHash = endHash = 0;
														
 
															+    }
														
 
															+    public Store(long begin, long end, int startHash, int endHash) {
														
 
															+      this.begin = begin;
														
 
															+      this.end = end;
														
 
															+      this.startHash = startHash;
														
 
															+      this.endHash = endHash;
														
 
															+    }
														
 
															+    public long begin;
														
 
															+    public long end;
														
 
															+    public int startHash;
														
 
															+    public int endHash;
														
 
															+  }
														
 
															+  
														
 
															+  // make sure that this harPath is relative to the har filesystem
														
 
															+  // this only works for relative paths. This returns the line matching
														
 
															+  // the file in the index. Returns a null if there is not matching 
														
 
															+  // filename in the index file.
														
 
															+  private String fileStatusInIndex(Path harPath) throws IOException {
														
 
															+    // read the index file 
														
 
															+    int hashCode = getHarHash(harPath);
														
 
															+    // get the master index to find the pos 
														
 
															+    // in the index file
														
 
															+    FSDataInputStream in = fs.open(masterIndex);
														
 
															+    FileStatus masterStat = fs.getFileStatus(masterIndex);
														
 
															+    LineReader lin = new LineReader(in, getConf());
														
 
															+    Text line = new Text();
														
 
															+    long read = lin.readLine(line);
														
 
															+   //ignore the first line. this is the header of the index files
														
 
															+    String[] readStr = null;
														
 
															+    List<Store> stores = new ArrayList<Store>();
														
 
															+    while(read < masterStat.getLen()) {
														
 
															+      int b = lin.readLine(line);
														
 
															+      read += b;
														
 
															+      readStr = line.toString().split(" ");
														
 
															+      int startHash = Integer.parseInt(readStr[0]);
														
 
															+      int endHash  = Integer.parseInt(readStr[1]);
														
 
															+      if (startHash <= hashCode && hashCode <= endHash) {
														
 
															+        stores.add(new Store(Long.parseLong(readStr[2]), 
														
 
															+            Long.parseLong(readStr[3]), startHash,
														
 
															+            endHash));
														
 
															+      }
														
 
															+      line.clear();
														
 
															+    }
														
 
															+    try {
														
 
															+      lin.close();
														
 
															+    } catch(IOException io){
														
 
															+      // do nothing just a read.
														
 
															+    }
														
 
															+    FSDataInputStream aIn = fs.open(archiveIndex);
														
 
															+    LineReader aLin;
														
 
															+    String retStr = null;
														
 
															+    // now start reading the real index file
														
 
															+    for (Store s: stores) {
														
 
															+      read = 0;
														
 
															+      aIn.seek(s.begin);
														
 
															+      aLin = new LineReader(aIn, getConf());
														
 
															+      while (read + s.begin < s.end) {
														
 
															+        int tmp = aLin.readLine(line);
														
 
															+        read += tmp;
														
 
															+        String lineFeed = line.toString();
														
 
															+        String[] parsed = lineFeed.split(" ");
														
 
															+        if (harPath.compareTo(new Path(parsed[0])) == 0) {
														
 
															+          // bingo!
														
 
															+          retStr = lineFeed;
														
 
															+          break;
														
 
															+        }
														
 
															+        line.clear();
														
 
															+      }
														
 
															+      if (retStr != null)
														
 
															+        break;
														
 
															+    }
														
 
															+    try {
														
 
															+      aIn.close();
														
 
															+    } catch(IOException io) {
														
 
															+      //do nothing
														
 
															+    }
														
 
															+    return retStr;
														
 
															+  }
														
 
															+  
														
 
															+  // a single line parser for hadoop archives status 
														
 
															+  // stored in a single line in the index files 
														
 
															+  // the format is of the form 
														
 
															+  // filename "dir"/"file" partFileName startIndex length 
														
 
															+  // <space seperated children>
														
 
															+  private static class HarStatus {
														
 
															+    boolean isDir;
														
 
															+    String name;
														
 
															+    List<String> children;
														
 
															+    String partName;
														
 
															+    long startIndex;
														
 
															+    long length;
														
 
															+    public HarStatus(String harString) {
														
 
															+      String[] splits = harString.split(" ");
														
 
															+      this.name = splits[0];
														
 
															+      this.isDir = "dir".equals(splits[1]) ? true: false;
														
 
															+      // this is equal to "none" if its a directory
														
 
															+      this.partName = splits[2];
														
 
															+      this.startIndex = Long.parseLong(splits[3]);
														
 
															+      this.length = Long.parseLong(splits[4]);
														
 
															+      if (isDir) {
														
 
															+        children = new ArrayList<String>();
														
 
															+        for (int i = 5; i < splits.length; i++) {
														
 
															+          children.add(splits[i]);
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    public boolean isDir() {
														
 
															+      return isDir;
														
 
															+    }
														
 
															+    
														
 
															+    public String getName() {
														
 
															+      return name;
														
 
															+    }
														
 
															+    
														
 
															+    public List<String> getChildren() {
														
 
															+      return children;
														
 
															+    }
														
 
															+    public String getFileName() {
														
 
															+      return name;
														
 
															+    }
														
 
															+    public String getPartName() {
														
 
															+      return partName;
														
 
															+    }
														
 
															+    public long getStartIndex() {
														
 
															+      return startIndex;
														
 
															+    }
														
 
															+    public long getLength() {
														
 
															+      return length;
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * return the filestatus of files in har archive.
														
 
															+   * The permission returned are that of the archive
														
 
															+   * index files. The permissions are not persisted 
														
 
															+   * while creating a hadoop archive.
														
 
															+   * @param f the path in har filesystem
														
 
															+   * @return filestatus.
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public FileStatus getFileStatus(Path f) throws IOException {
														
 
															+    FileStatus archiveStatus = fs.getFileStatus(archiveIndex);
														
 
															+    // get the fs DataInputStream for the underlying file
														
 
															+    // look up the index.
														
 
															+    Path p = makeQualified(f);
														
 
															+    Path harPath = getPathInHar(p);
														
 
															+    if (harPath == null) {
														
 
															+      throw new IOException("Invalid file name: " + f + " in " + uri);
														
 
															+    }
														
 
															+    String readStr = fileStatusInIndex(harPath);
														
 
															+    if (readStr == null) {
														
 
															+      throw new FileNotFoundException("File: " +  f + " does not exist in " + uri);
														
 
															+    }
														
 
															+    HarStatus hstatus = null;
														
 
															+    hstatus = new HarStatus(readStr);
														
 
															+    return new FileStatus(hstatus.isDir()?0:hstatus.getLength(), hstatus.isDir(),
														
 
															+        (int)archiveStatus.getReplication(), archiveStatus.getBlockSize(),
														
 
															+        archiveStatus.getModificationTime(), archiveStatus.getAccessTime(),
														
 
															+        new FsPermission(
														
 
															+        archiveStatus.getPermission()), archiveStatus.getOwner(), 
														
 
															+        archiveStatus.getGroup(), 
														
 
															+            makeRelative(this.uri.toString(), new Path(hstatus.name)));
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Returns a har input stream which fakes end of 
														
 
															+   * file. It reads the index files to get the part 
														
 
															+   * file name and the size and start of the file.
														
 
															+   */
														
 
															+  @Override
														
 
															+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
														
 
															+    // get the fs DataInputStream for the underlying file
														
 
															+    // look up the index.
														
 
															+    Path p = makeQualified(f);
														
 
															+    Path harPath = getPathInHar(p);
														
 
															+    if (harPath == null) {
														
 
															+      throw new IOException("Invalid file name: " + f + " in " + uri);
														
 
															+    }
														
 
															+    String readStr = fileStatusInIndex(harPath);
														
 
															+    if (readStr == null) {
														
 
															+      throw new FileNotFoundException(f + ": not found in " + archivePath);
														
 
															+    }
														
 
															+    HarStatus hstatus = new HarStatus(readStr); 
														
 
															+    // we got it.. woo hooo!!! 
														
 
															+    if (hstatus.isDir()) {
														
 
															+      throw new FileNotFoundException(f + " : not a file in " +
														
 
															+                archivePath);
														
 
															+    }
														
 
															+    return new HarFSDataInputStream(fs, new Path(archivePath, 
														
 
															+        hstatus.getPartName()),
														
 
															+        hstatus.getStartIndex(), hstatus.getLength(), bufferSize);
														
 
															+  }
														
 
															+ 
														
 
															+  /*
														
 
															+   * create throws an exception in Har filesystem.
														
 
															+   * The archive once created cannot be changed.
														
 
															+   */
														
 
															+  public FSDataOutputStream create(Path f, int bufferSize) 
														
 
															+                                    throws IOException {
														
 
															+    throw new IOException("Har: Create not allowed");
														
 
															+  }
														
 
															+  
														
 
															+  public FSDataOutputStream create(Path f,
														
 
															+      FsPermission permission,
														
 
															+      boolean overwrite,
														
 
															+      int bufferSize,
														
 
															+      short replication,
														
 
															+      long blockSize,
														
 
															+      Progressable progress) throws IOException {
														
 
															+    throw new IOException("Har: create not allowed.");
														
 
															+  }
														
 
															+  
														
 
															+  @Override
														
 
															+  public void close() throws IOException {
														
 
															+    if (fs != null) {
														
 
															+      try {
														
 
															+        fs.close();
														
 
															+      } catch(IOException ie) {
														
 
															+        //this might already be closed
														
 
															+        // ignore
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Not implemented.
														
 
															+   */
														
 
															+  @Override
														
 
															+  public boolean setReplication(Path src, short replication) throws IOException{
														
 
															+    throw new IOException("Har: setreplication not allowed");
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Not implemented.
														
 
															+   */
														
 
															+  @Override
														
 
															+  public boolean delete(Path f, boolean recursive) throws IOException { 
														
 
															+    throw new IOException("Har: delete not allowed");
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * liststatus returns the children of a directory 
														
 
															+   * after looking up the index files.
														
 
															+   */
														
 
															+  @Override
														
 
															+  public FileStatus[] listStatus(Path f) throws IOException {
														
 
															+    //need to see if the file is an index in file
														
 
															+    //get the filestatus of the archive directory
														
 
															+    // we will create fake filestatuses to return
														
 
															+    // to the client
														
 
															+    List<FileStatus> statuses = new ArrayList<FileStatus>();
														
 
															+    FileStatus archiveStatus = fs.getFileStatus(archiveIndex);
														
 
															+    Path tmpPath = makeQualified(f);
														
 
															+    Path harPath = getPathInHar(tmpPath);
														
 
															+    String readStr = fileStatusInIndex(harPath);
														
 
															+    if (readStr == null) {
														
 
															+      throw new FileNotFoundException("File " + f + " not found in " + archivePath);
														
 
															+    }
														
 
															+    HarStatus hstatus = new HarStatus(readStr);
														
 
															+    if (!hstatus.isDir()) 
														
 
															+        statuses.add(new FileStatus(hstatus.getLength(), 
														
 
															+            hstatus.isDir(),
														
 
															+            archiveStatus.getReplication(), archiveStatus.getBlockSize(),
														
 
															+            archiveStatus.getModificationTime(), archiveStatus.getAccessTime(),
														
 
															+            new FsPermission(archiveStatus.getPermission()),
														
 
															+            archiveStatus.getOwner(), archiveStatus.getGroup(), 
														
 
															+            makeRelative(this.uri.toString(), new Path(hstatus.name))));
														
 
															+    else 
														
 
															+      for (String child: hstatus.children) {
														
 
															+        FileStatus tmp = getFileStatus(new Path(tmpPath, child));
														
 
															+        statuses.add(tmp);
														
 
															+      }
														
 
															+    return statuses.toArray(new FileStatus[statuses.size()]);
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * return the top level archive path.
														
 
															+   */
														
 
															+  public Path getHomeDirectory() {
														
 
															+    return new Path(uri.toString());
														
 
															+  }
														
 
															+  
														
 
															+  public void setWorkingDirectory(Path newDir) {
														
 
															+    //does nothing.
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * not implemented.
														
 
															+   */
														
 
															+  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
														
 
															+    throw new IOException("Har: mkdirs not allowed");
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * not implemented.
														
 
															+   */
														
 
															+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws 
														
 
															+        IOException {
														
 
															+    throw new IOException("Har: copyfromlocalfile not allowed");
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * copies the file in the har filesystem to a local file.
														
 
															+   */
														
 
															+  public void copyToLocalFile(boolean delSrc, Path src, Path dst) 
														
 
															+    throws IOException {
														
 
															+    FileUtil.copy(this, src, getLocal(getConf()), dst, false, getConf());
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * not implemented.
														
 
															+   */
														
 
															+  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
														
 
															+    throws IOException {
														
 
															+    throw new IOException("Har: startLocalOutput not allowed");
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * not implemented.
														
 
															+   */
														
 
															+  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
														
 
															+    throws IOException {
														
 
															+    throw new IOException("Har: completeLocalOutput not allowed");
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * not implemented.
														
 
															+   */
														
 
															+  public void setOwner(Path p, String username, String groupname)
														
 
															+    throws IOException {
														
 
															+    throw new IOException("Har: setowner not allowed");
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Not implemented.
														
 
															+   */
														
 
															+  public void setPermission(Path p, FsPermission permisssion) 
														
 
															+    throws IOException {
														
 
															+    throw new IOException("Har: setPermission not allowed");
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Hadoop archives input stream. This input stream fakes EOF 
														
 
															+   * since archive files are part of bigger part files.
														
 
															+   */
														
 
															+  private static class HarFSDataInputStream extends FSDataInputStream {
														
 
															+    /**
														
 
															+     * Create an input stream that fakes all the reads/positions/seeking.
														
 
															+     */
														
 
															+    private static class HarFsInputStream extends FSInputStream {
														
 
															+      private long position, start, end;
														
 
															+      //The underlying data input stream that the
														
 
															+      // underlying filesystem will return.
														
 
															+      private FSDataInputStream underLyingStream;
														
 
															+      //one byte buffer
														
 
															+      private byte[] oneBytebuff = new byte[1];
														
 
															+      HarFsInputStream(FileSystem fs, Path path, long start,
														
 
															+          long length, int bufferSize) throws IOException {
														
 
															+        underLyingStream = fs.open(path, bufferSize);
														
 
															+        underLyingStream.seek(start);
														
 
															+        // the start of this file in the part file
														
 
															+        this.start = start;
														
 
															+        // the position pointer in the part file
														
 
															+        this.position = start;
														
 
															+        // the end pointer in the part file
														
 
															+        this.end = start + length;
														
 
															+      }
														
 
															+      
														
 
															+      public synchronized int available() throws IOException {
														
 
															+        long remaining = end - underLyingStream.getPos();
														
 
															+        if (remaining > (long)Integer.MAX_VALUE) {
														
 
															+          return Integer.MAX_VALUE;
														
 
															+        }
														
 
															+        return (int) remaining;
														
 
															+      }
														
 
															+      
														
 
															+      public synchronized  void close() throws IOException {
														
 
															+        underLyingStream.close();
														
 
															+        super.close();
														
 
															+      }
														
 
															+      
														
 
															+      //not implemented
														
 
															+      @Override
														
 
															+      public void mark(int readLimit) {
														
 
															+        // do nothing 
														
 
															+      }
														
 
															+      
														
 
															+      /**
														
 
															+       * reset is not implemented
														
 
															+       */
														
 
															+      public void reset() throws IOException {
														
 
															+        throw new IOException("reset not implemented.");
														
 
															+      }
														
 
															+      
														
 
															+      public synchronized int read() throws IOException {
														
 
															+        int ret = read(oneBytebuff, 0, 1);
														
 
															+        return (ret <= 0) ? -1: (oneBytebuff[0] & 0xff);
														
 
															+      }
														
 
															+      
														
 
															+      public synchronized int read(byte[] b) throws IOException {
														
 
															+        int ret = read(b, 0, b.length);
														
 
															+        if (ret != -1) {
														
 
															+          position += ret;
														
 
															+        }
														
 
															+        return ret;
														
 
															+      }
														
 
															+      
														
 
															+      /**
														
 
															+       * 
														
 
															+       */
														
 
															+      public synchronized int read(byte[] b, int offset, int len) 
														
 
															+        throws IOException {
														
 
															+        int newlen = len;
														
 
															+        int ret = -1;
														
 
															+        if (position + len > end) {
														
 
															+          newlen = (int) (end - position);
														
 
															+        }
														
 
															+        // end case
														
 
															+        if (newlen == 0) 
														
 
															+          return ret;
														
 
															+        ret = underLyingStream.read(b, offset, newlen);
														
 
															+        position += ret;
														
 
															+        return ret;
														
 
															+      }
														
 
															+      
														
 
															+      public synchronized long skip(long n) throws IOException {
														
 
															+        long tmpN = n;
														
 
															+        if (tmpN > 0) {
														
 
															+          if (position + tmpN > end) {
														
 
															+            tmpN = end - position;
														
 
															+          }
														
 
															+          underLyingStream.seek(tmpN + position);
														
 
															+          position += tmpN;
														
 
															+          return tmpN;
														
 
															+        }
														
 
															+        return (tmpN < 0)? -1 : 0;
														
 
															+      }
														
 
															+      
														
 
															+      public synchronized long getPos() throws IOException {
														
 
															+        return (position - start);
														
 
															+      }
														
 
															+      
														
 
															+      public synchronized void seek(long pos) throws IOException {
														
 
															+        if (pos < 0 || (start + pos > end)) {
														
 
															+          throw new IOException("Failed to seek: EOF");
														
 
															+        }
														
 
															+        position = start + pos;
														
 
															+        underLyingStream.seek(position);
														
 
															+      }
														
 
															+
														
 
															+      public boolean seekToNewSource(long targetPos) throws IOException {
														
 
															+        //do not need to implement this
														
 
															+        // hdfs in itself does seektonewsource 
														
 
															+        // while reading.
														
 
															+        return false;
														
 
															+      }
														
 
															+      
														
 
															+      /**
														
 
															+       * implementing position readable. 
														
 
															+       */
														
 
															+      public int read(long pos, byte[] b, int offset, int length) 
														
 
															+      throws IOException {
														
 
															+        int nlength = length;
														
 
															+        if (start + nlength + pos > end) {
														
 
															+          nlength = (int) (end - (start + pos));
														
 
															+        }
														
 
															+        return underLyingStream.read(pos + start , b, offset, nlength);
														
 
															+      }
														
 
															+      
														
 
															+      /**
														
 
															+       * position readable again.
														
 
															+       */
														
 
															+      public void readFully(long pos, byte[] b, int offset, int length) 
														
 
															+      throws IOException {
														
 
															+        if (start + length + pos > end) {
														
 
															+          throw new IOException("Not enough bytes to read.");
														
 
															+        }
														
 
															+        underLyingStream.readFully(pos + start, b, offset, length);
														
 
															+      }
														
 
															+      
														
 
															+      public void readFully(long pos, byte[] b) throws IOException {
														
 
															+          readFully(pos, b, 0, b.length);
														
 
															+      }
														
 
															+      
														
 
															+    }
														
 
															+  
														
 
															+    /**
														
 
															+     * constructors for har input stream.
														
 
															+     * @param fs the underlying filesystem
														
 
															+     * @param p The path in the underlying filesystem
														
 
															+     * @param start the start position in the part file
														
 
															+     * @param length the length of valid data in the part file
														
 
															+     * @param bufsize the buffer size
														
 
															+     * @throws IOException
														
 
															+     */
														
 
															+    public HarFSDataInputStream(FileSystem fs, Path  p, long start, 
														
 
															+        long length, int bufsize) throws IOException {
														
 
															+        super(new HarFsInputStream(fs, p, start, length, bufsize));
														
 
															+    }
														
 
															+
														
 
															+    /**
														
 
															+     * constructor for har input stream.
														
 
															+     * @param fs the underlying filesystem
														
 
															+     * @param p the path in the underlying file system
														
 
															+     * @param start the start position in the part file
														
 
															+     * @param length the length of valid data in the part file.
														
 
															+     * @throws IOException
														
 
															+     */
														
 
															+    public HarFSDataInputStream(FileSystem fs, Path  p, long start, long length)
														
 
															+      throws IOException {
														
 
															+        super(new HarFsInputStream(fs, p, start, length, 0));
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/src/core/org/apache/hadoop/io/compress/CodecPool.java
+++ b/src/core/org/apache/hadoop/io/compress/CodecPool.java
@@ -141,6 +141,10 @@ public class CodecPool {
 
															     if (compressor == null) {
														
 
															       return;
														
 
															     }
														
 
															+    // if the compressor can't be reused, don't pool it.
														
 
															+    if (compressor.getClass().isAnnotationPresent(DoNotPool.class)) {
														
 
															+      return;
														
 
															+    }
														
 
															     compressor.reset();
														
 
															     payback(compressorPool, compressor);
														
 
															   }
														
@@ -155,6 +159,10 @@ public class CodecPool {
 
															     if (decompressor == null) {
														
 
															       return;
														
 
															     }
														
 
															+    // if the decompressor can't be reused, don't pool it.
														
 
															+    if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
														
 
															+      return;
														
 
															+    }
														
 
															     decompressor.reset();
														
 
															     payback(decompressorPool, decompressor);
														
 
															   }
														
--- a/src/core/org/apache/hadoop/io/compress/DoNotPool.java
+++ b/src/core/org/apache/hadoop/io/compress/DoNotPool.java
@@ -0,0 +1,35 @@
 
															+/*
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+package org.apache.hadoop.io.compress;
														
 
															+
														
 
															+import java.lang.annotation.Documented;
														
 
															+import java.lang.annotation.ElementType;
														
 
															+import java.lang.annotation.Retention;
														
 
															+import java.lang.annotation.RetentionPolicy;
														
 
															+import java.lang.annotation.Target;
														
 
															+
														
 
															+/**
														
 
															+ * This is a marker annotation that marks a compressor or decompressor 
														
 
															+ * type as not to be pooled.
														
 
															+ */
														
 
															+@Retention(RetentionPolicy.RUNTIME)
														
 
															+@Target(ElementType.TYPE)
														
 
															+@Documented
														
 
															+public @interface DoNotPool {
														
 
															+
														
 
															+}
														
--- a/src/core/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java
+++ b/src/core/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java
@@ -24,12 +24,14 @@ import java.util.zip.Inflater;
 
															 import java.util.zip.CRC32;
														
 
															 import org.apache.hadoop.io.compress.Decompressor;
														
 
															+import org.apache.hadoop.io.compress.DoNotPool;
														
 
															 /**
														
 
															  * A {@link Decompressor} based on the popular gzip compressed file format.
														
 
															  * http://www.gzip.org/
														
 
															  *
														
 
															  */
														
 
															+@DoNotPool
														
 
															 public class BuiltInGzipDecompressor implements Decompressor {
														
 
															   private static final int GZIP_MAGIC_ID = 0x8b1f;  // if read as LE short int
														
 
															   private static final int GZIP_DEFLATE_METHOD = 8;
														
--- a/src/core/overview.html
+++ b/src/core/overview.html
@@ -114,7 +114,7 @@ be demonstrated as follows:</p>
 
															 <tt>
														
 
															 mkdir input<br>
														
 
															 cp conf/*.xml input<br>
														
 
															-bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'<br>
														
 
															+bin/hadoop jar hadoop-examples-*.jar grep input output 'dfs[a-z.]+'<br>
														
 
															 cat output/*
														
 
															 </tt>
														
 
															 <p>This will display counts for each match of the <a
														
@@ -217,7 +217,7 @@ command, run on the master node:</p>
 
															 examine it:</p>
														
 
															 <tt>
														
 
															-bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'<br>
														
 
															+bin/hadoop jar hadoop-examples-*.jar grep input output 'dfs[a-z.]+'<br>
														
 
															 bin/hadoop fs -get output output
														
 
															 cat output/*
														
 
															 </tt>
														
--- a/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml
+++ b/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml
@@ -20,7 +20,7 @@
 
															 <document>
														
 
															   <header>
														
 
															-    <title>Capacity Scheduler Guide</title>
														
 
															+    <title>CapacityScheduler Guide</title>
														
 
															   </header>
														
 
															   <body>
														
@@ -28,93 +28,127 @@
 
															     <section>
														
 
															       <title>Purpose</title>
														
 
															-      <p>This document describes the Capacity Scheduler, a pluggable 
														
 
															-      MapReduce scheduler for Hadoop which provides a way to share 
														
 
															-      large clusters.</p>
														
 
															+      <p>This document describes the CapacityScheduler, a pluggable 
														
 
															+      MapReduce scheduler for Hadoop which allows for multiple-tenants to 
														
 
															+      securely share a large cluster such that their applications are allocated
														
 
															+      resources in a timely manner under constraints of allocated capacities.
														
 
															+      </p>
														
 
															+    </section>
														
 
															+    
														
 
															+    <section>
														
 
															+      <title>Overview</title>
														
 
															+     
														
 
															+      <p>The CapacityScheduler is designed to run Hadoop Map-Reduce as a 
														
 
															+      shared, multi-tenant cluster in an operator-friendly manner while 
														
 
															+      maximizing the throughput and the utilization of the cluster while
														
 
															+      running Map-Reduce applications. </p>
														
 
															+     
														
 
															+      <p>Traditionally each organization has it own private set of compute 
														
 
															+      resources that have sufficient capacity to meet the organization's SLA 
														
 
															+      under peak or near peak conditions. This generally leads to poor average 
														
 
															+      utilization and the overhead of managing multiple independent clusters, 
														
 
															+      one per each organization. Sharing clusters between organizations is a 
														
 
															+      cost-effective manner of running large Hadoop installations since this 
														
 
															+      allows them to reap benefits of economies of scale without creating 
														
 
															+      private clusters.  However, organizations are concerned about sharing a 
														
 
															+      cluster because they are worried about others using the resources that 
														
 
															+      are critical for their SLAs.</p> 
														
 
															+
														
 
															+      <p>The CapacityScheduler is designed to allow sharing a large cluster 
														
 
															+      while giving  each organization a minimum capacity guarantee. The central 
														
 
															+      idea is that the available resources in the Hadoop Map-Reduce cluster are 
														
 
															+      partitioned among multiple organizations who collectively fund the 
														
 
															+      cluster based on computing needs. There is an added benefit that an 
														
 
															+      organization can access any excess capacity no being used by others. This 
														
 
															+      provides elasticity for the organizations in a cost-effective manner.</p> 
														
 
															+
														
 
															+      <p>Sharing clusters across organizations necessitates strong support for
														
 
															+      multi-tenancy since each organization must be guaranteed capacity and 
														
 
															+      safe-guards to ensure the shared cluster is impervious to single rouge 
														
 
															+      job or user. The CapacityScheduler provides a stringent set of limits to 
														
 
															+      ensure that a single job or user or queue cannot consume dispropotionate 
														
 
															+      amount of resources in the cluster. Also, the JobTracker of the cluster,  
														
 
															+      in particular, is a precious resource and the CapacityScheduler provides 
														
 
															+      limits on initialized/pending tasks and jobs from a single user and queue 
														
 
															+      to ensure fairness and stability of the cluster.</p> 
														
 
															+
														
 
															+      <p>The primary abstraction provided by the CapacityScheduler is the 
														
 
															+      concept of <em>queues</em>. These queues are typically setup by administrators
														
 
															+      to reflect the economics of the shared cluster.</p>
														
 
															     </section>
														
 
															     <section>
														
 
															       <title>Features</title>
														
 
															-      <p>The Capacity Scheduler supports the following features:</p> 
														
 
															+      <p>The CapacityScheduler supports the following features:</p> 
														
 
															       <ul>
														
 
															         <li>
														
 
															-          Support for multiple queues, where a job is submitted to a queue.
														
 
															+          Capacity Guarantees - Support for multiple queues, where a job is 
														
 
															+          submitted to a queue.Queues are allocated a fraction of the capacity 
														
 
															+          of the grid in the sense that a certain capacity of resources will be 
														
 
															+          at their disposal. All jobs submitted to a queue will have access to 
														
 
															+          the capacity allocated to the queue. Adminstrators can configure soft 
														
 
															+          limits and optional hard limits on the capacity allocated to each queue. 
														
 
															         </li>
														
 
															         <li>
														
 
															-          Queues are allocated a fraction of the capacity of the grid in the 
														
 
															-          sense that a certain capacity of resources will be at their 
														
 
															-          disposal. All jobs submitted to a queue will have access to the 
														
 
															-          capacity allocated to the queue.
														
 
															+          Security - Each queue has strict ACLs which controls which users can 
														
 
															+          submit jobs to individual queues. Also, there are safe-guards to 
														
 
															+          ensure that users cannot view and/or modify jobs from other users if
														
 
															+          so desired. Also, per-queue and system administrator roles are 
														
 
															+          supported.
														
 
															         </li>
														
 
															         <li>
														
 
															-          Free resources can be allocated to any queue beyond it's capacity. 
														
 
															-          When there is demand for these resources from queues running below 
														
 
															-          capacity at a future point in time, as tasks scheduled on these 
														
 
															+          Elasticity - Free resources can be allocated to any queue beyond it's 
														
 
															+          capacity. When there is demand for these resources from queues running 
														
 
															+          below capacity at a future point in time, as tasks scheduled on these 
														
 
															           resources complete, they will be assigned to jobs on queues 
														
 
															-          running below the capacity.
														
 
															+          running below the capacity. This ensures that resources are available 
														
 
															+          in a predictable and elastic manner to queues, thus preventing 
														
 
															+          artifical silos of resources in the cluster which helps utilization.
														
 
															         </li>
														
 
															         <li>
														
 
															-          Queues optionally support job priorities (disabled by default).
														
 
															+          Multi-tenancy - Comprehensive set of limits are provided to prevent 
														
 
															+          a single job, user and queue from monpolizing resources of the queue 
														
 
															+          or the cluster as a whole to ensure that the system, particularly the 
														
 
															+          JobTracker, isn't overwhelmed by too many tasks or jobs. 
														
 
															         </li>
														
 
															         <li>
														
 
															-          Within a queue, jobs with higher priority will have access to the 
														
 
															-          queue's resources before jobs with lower priority. However, once a 
														
 
															-          job is running, it will not be preempted for a higher priority job,
														
 
															-          though new tasks from the higher priority job will be 
														
 
															-          preferentially scheduled.
														
 
															+          Operability - The queue definitions and properties can be changed, 
														
 
															+          at runtime, by administrators in a secure manner to minimize 
														
 
															+          disruption to users. Also, a console is provided for users and 
														
 
															+          administrators to view current allocation of resources to various 
														
 
															+          queues in the system.
														
 
															         </li>
														
 
															         <li>
														
 
															-          In order to prevent one or more users from monopolizing its 
														
 
															-          resources, each queue enforces a limit on the percentage of 
														
 
															-          resources allocated to a user at any given time, if there is 
														
 
															-          competition for them.  
														
 
															+          Resource-based Scheduling - Support for resource-intensive jobs, 
														
 
															+          wherein a job can optionally specify higher resource-requirements than 
														
 
															+          the default, there-by accomodating applications with differing resource
														
 
															+          requirements. Currently, memory is the the resource requirement 
														
 
															+          supported.
														
 
															         </li>
														
 
															         <li>
														
 
															-          Support for memory-intensive jobs, wherein a job can optionally 
														
 
															-          specify higher memory-requirements than the default, and the tasks 
														
 
															-          of the job will only be run on TaskTrackers that have enough memory 
														
 
															-          to spare.
														
 
															+          Job Priorities - Queues optionally support job priorities (disabled 
														
 
															+          by default). Within a queue, jobs with higher priority will have 
														
 
															+          access to the queue's resources before jobs with lower priority. 
														
 
															+          However, once a job is running, it will not be preempted for a higher 
														
 
															+          priority job, <em>premption</em> is on the roadmap is currently not 
														
 
															+          supported.
														
 
															         </li>
														
 
															       </ul>
														
 
															     </section>
														
 
															-    <section>
														
 
															-      <title>Picking a task to run</title>
														
 
															-      
														
 
															-      <p>Note that many of these steps can be, and will be, enhanced over time
														
 
															-      to provide better algorithms.</p>
														
 
															-      
														
 
															-      <p>Whenever a TaskTracker is free, the Capacity Scheduler picks 
														
 
															-      a queue which has most free space (whose ratio of # of running slots to 
														
 
															-      capacity is the lowest).</p>
														
 
															-      
														
 
															-      <p>Once a queue is selected, the Scheduler picks a job in the queue. Jobs
														
 
															-      are sorted based on when they're submitted and their priorities (if the 
														
 
															-      queue supports priorities). Jobs are considered in order, and a job is 
														
 
															-      selected if its user is within the user-quota for the queue, i.e., the 
														
 
															-      user is not already using queue resources above his/her limit. The 
														
 
															-      Scheduler also makes sure that there is enough free memory in the 
														
 
															-      TaskTracker to tun the job's task, in case the job has special memory
														
 
															-      requirements.</p>
														
 
															-      
														
 
															-      <p>Once a job is selected, the Scheduler picks a task to run. This logic 
														
 
															-      to pick a task remains unchanged from earlier versions.</p> 
														
 
															-      
														
 
															-    </section>
														
 
															-    
														
 
															     <section>
														
 
															       <title>Installation</title>
														
 
															-        <p>The Capacity Scheduler is available as a JAR file in the Hadoop
														
 
															+        <p>The CapacityScheduler is available as a JAR file in the Hadoop
														
 
															         tarball under the <em>contrib/capacity-scheduler</em> directory. The name of 
														
 
															-        the JAR file would be on the lines of hadoop-*-capacity-scheduler.jar.</p>
														
 
															+        the JAR file would be on the lines of hadoop-capacity-scheduler-*.jar.</p>
														
 
															         <p>You can also build the Scheduler from source by executing
														
 
															         <em>ant package</em>, in which case it would be available under
														
 
															         <em>build/contrib/capacity-scheduler</em>.</p>
														
 
															-        <p>To run the Capacity Scheduler in your Hadoop installation, you need 
														
 
															+        <p>To run the CapacityScheduler in your Hadoop installation, you need 
														
 
															         to put it on the <em>CLASSPATH</em>. The easiest way is to copy the 
														
 
															-        <code>hadoop-*-capacity-scheduler.jar</code> from 
														
 
															+        <code>hadoop-capacity-scheduler-*.jar</code> from 
														
 
															         to <code>HADOOP_HOME/lib</code>. Alternatively, you can modify 
														
 
															         <em>HADOOP_CLASSPATH</em> to include this jar, in 
														
 
															         <code>conf/hadoop-env.sh</code>.</p>
														
@@ -124,9 +158,9 @@
 
															       <title>Configuration</title>
														
 
															       <section>
														
 
															-        <title>Using the Capacity Scheduler</title>
														
 
															+        <title>Using the CapacityScheduler</title>
														
 
															         <p>
														
 
															-          To make the Hadoop framework use the Capacity Scheduler, set up
														
 
															+          To make the Hadoop framework use the CapacityScheduler, set up
														
 
															           the following property in the site configuration:</p>
														
 
															           <table>
														
 
															             <tr>
														
@@ -144,14 +178,22 @@
 
															         <title>Setting up queues</title>
														
 
															         <p>
														
 
															           You can define multiple queues to which users can submit jobs with
														
 
															-          the Capacity Scheduler. To define multiple queues, you should edit
														
 
															-          the site configuration for Hadoop and modify the
														
 
															-          <em>mapred.queue.names</em> property.
														
 
															+          the CapacityScheduler. To define multiple queues, you should use the  
														
 
															+          <em>mapred.queue.names</em> property in 
														
 
															+          <code>conf/hadoop-site.xml</code>.
														
 
															         </p>
														
 
															+        
														
 
															+        <p>
														
 
															+          The CapacityScheduler can be configured with several properties
														
 
															+          for each queue that control the behavior of the Scheduler. This
														
 
															+          configuration is in the <em>conf/capacity-scheduler.xml</em>.
														
 
															+        </p>
														
 
															+        
														
 
															         <p>
														
 
															           You can also configure ACLs for controlling which users or groups
														
 
															-          have access to the queues.
														
 
															+          have access to the queues in <code>conf/mapred-queue-acls.xml</code>.
														
 
															         </p>
														
 
															+        
														
 
															         <p>
														
 
															           For more details, refer to
														
 
															           <a href="cluster_setup.html#Configuring+the+Hadoop+Daemons">Cluster 
														
@@ -160,25 +202,12 @@
 
															       </section>
														
 
															       <section>
														
 
															-        <title>Configuring properties for queues</title>
														
 
															+        <title>Queue properties</title>
														
 
															-        <p>The Capacity Scheduler can be configured with several properties
														
 
															-        for each queue that control the behavior of the Scheduler. This
														
 
															-        configuration is in the <em>conf/capacity-scheduler.xml</em>. By
														
 
															-        default, the configuration is set up for one queue, named 
														
 
															-        <em>default</em>.</p>
														
 
															-        <p>To specify a property for a queue that is defined in the site
														
 
															-        configuration, you should use the property name as
														
 
															-        <em>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.&lt;property-name&gt;</em>.
														
 
															-        </p>
														
 
															-        <p>For example, to define the property <em>capacity</em>
														
 
															-        for queue named <em>research</em>, you should specify the property
														
 
															-        name as 
														
 
															-        <em>mapred.capacity-scheduler.queue.research.capacity</em>.
														
 
															-        </p>
														
 
															-
														
 
															-        <p>The properties defined for queues and their descriptions are
														
 
															-        listed in the table below:</p>
														
 
															+        <section>
														
 
															+        <title>Resource allocation</title>
														
 
															+        <p>The properties defined for resource allocations to queues and their 
														
 
															+        descriptions are listed in below:</p>
														
 
															         <table>
														
 
															           <tr><th>Name</th><th>Description</th></tr>
														
@@ -187,25 +216,8 @@
 
															             to be available for jobs in this queue. The sum of capacities 
														
 
															             for all queues should be less than or equal 100.</td>
														
 
															           </tr>
														
 
															-          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.supports-priority</td>
														
 
															-          	<td>If true, priorities of jobs will be taken into account in scheduling 
														
 
															-          	decisions.</td>
														
 
															-          </tr>
														
 
															-          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.minimum-user-limit-percent</td>
														
 
															-          	<td>Each queue enforces a limit on the percentage of resources 
														
 
															-          	allocated to a user at any given time, if there is competition 
														
 
															-          	for them. This user limit can vary between a minimum and maximum 
														
 
															-          	value. The former depends on the number of users who have submitted
														
 
															-          	jobs, and the latter is set to this property value. For example, 
														
 
															-          	suppose the value of this property is 25. If two users have 
														
 
															-          	submitted jobs to a queue, no single user can use more than 50% 
														
 
															-          	of the queue resources. If a third user submits a job, no single 
														
 
															-          	user can use more than 33% of the queue resources. With 4 or more 
														
 
															-          	users, no user can use more than 25% of the queue's resources. A 
														
 
															-          	value of 100 implies no user limits are imposed.</td>
														
 
															-          </tr>
														
 
															           <tr><td>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.maximum-capacity</td>
														
 
															-          	<td>
														
 
															+            <td>
														
 
															                   maximum-capacity defines a limit beyond which a queue cannot
														
 
															                   use the capacity of the cluster.This provides a means to limit
														
 
															                   how much excess capacity a queue can use. By default, there
														
@@ -228,137 +240,175 @@
 
															                   absolute terms would increase accordingly.
														
 
															                 </td>
														
 
															           </tr>
														
 
															-        </table>
														
 
															-      </section>
														
 
															-      
														
 
															-      <section>
														
 
															-        <title>Memory management</title>
														
 
															-      
														
 
															-        <p>The Capacity Scheduler supports scheduling of tasks on a
														
 
															-        <code>TaskTracker</code>(TT) based on a job's memory requirements
														
 
															-        and the availability of RAM and Virtual Memory (VMEM) on the TT node.
														
 
															-        See the <a href="mapred_tutorial.html#Memory+monitoring"> 
														
 
															-        MapReduce Tutorial</a> for details on how the TT monitors
														
 
															-        memory usage.</p>
														
 
															-        <p>Currently the memory based scheduling is only supported
														
 
															-        in Linux platform.</p>
														
 
															-        <p>Memory-based scheduling works as follows:</p>
														
 
															-        <ol>
														
 
															-          <li>The absence of any one or more of three config parameters 
														
 
															-          or -1 being set as value of any of the parameters, 
														
 
															-          <code>mapred.tasktracker.vmem.reserved</code>, 
														
 
															-          <code>mapred.task.default.maxvmem</code>, or
														
 
															-          <code>mapred.task.limit.maxvmem</code>, disables memory-based
														
 
															-          scheduling, just as it disables memory monitoring for a TT. These
														
 
															-          config parameters are described in the 
														
 
															-          <a href="mapred_tutorial.html#Memory+monitoring">MapReduce 
														
 
															-          Tutorial</a>. The value of  
														
 
															-          <code>mapred.tasktracker.vmem.reserved</code> is 
														
 
															-          obtained from the TT via its heartbeat. 
														
 
															-          </li>
														
 
															-          <li>If all the three mandatory parameters are set, the Scheduler 
														
 
															-          enables VMEM-based scheduling. First, the Scheduler computes the free
														
 
															-          VMEM on the TT. This is the difference between the available VMEM on the
														
 
															-          TT (the node's total VMEM minus the offset, both of which are sent by 
														
 
															-          the TT on each heartbeat)and the sum of VMs already allocated to 
														
 
															-          running tasks (i.e., sum of the VMEM task-limits). Next, the Scheduler
														
 
															-          looks at the VMEM requirements for the job that's first in line to 
														
 
															-          run. If the job's VMEM requirements are less than the available VMEM on 
														
 
															-          the node, the job's task can be scheduled. If not, the Scheduler 
														
 
															-          ensures that the TT does not get a task to run (provided the job 
														
 
															-          has tasks to run). This way, the Scheduler ensures that jobs with 
														
 
															-          high memory requirements are not starved, as eventually, the TT 
														
 
															-          will have enough VMEM available. If the high-mem job does not have 
														
 
															-          any task to run, the Scheduler moves on to the next job. 
														
 
															-          </li>
														
 
															-          <li>In addition to VMEM, the Capacity Scheduler can also consider 
														
 
															-          RAM on the TT node. RAM is considered the same way as VMEM. TTs report
														
 
															-          the total RAM available on their node, and an offset. If both are
														
 
															-          set, the Scheduler computes the available RAM on the node. Next, 
														
 
															-          the Scheduler figures out the RAM requirements of the job, if any. 
														
 
															-          As with VMEM, users can optionally specify a RAM limit for their job
														
 
															-          (<code>mapred.task.maxpmem</code>, described in the MapReduce 
														
 
															-          Tutorial). The Scheduler also maintains a limit for this value 
														
 
															-          (<code>mapred.capacity-scheduler.task.default-pmem-percentage-in-vmem</code>, 
														
 
															-          described below). All these three values must be set for the 
														
 
															-          Scheduler to schedule tasks based on RAM constraints.
														
 
															-          </li>
														
 
															-          <li>The Scheduler ensures that jobs cannot ask for RAM or VMEM higher
														
 
															-          than configured limits. If this happens, the job is failed when it
														
 
															-          is submitted. 
														
 
															-          </li>
														
 
															-        </ol>
														
 
															-        
														
 
															-        <p>As described above, the additional scheduler-based config 
														
 
															-        parameters are as follows:</p>
														
 
															-
														
 
															-        <table>
														
 
															-          <tr><th>Name</th><th>Description</th></tr>
														
 
															-          <tr><td>mapred.capacity-scheduler.task.default-pmem-percentage-in-vmem</td>
														
 
															-          	<td>A percentage of the default VMEM limit for jobs
														
 
															-          	(<code>mapred.task.default.maxvmem</code>). This is the default 
														
 
															-          	RAM task-limit associated with a task. Unless overridden by a 
														
 
															-          	job's setting, this number defines the RAM task-limit.</td>
														
 
															+          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.minimum-user-limit-percent</td>
														
 
															+          	<td>Each queue enforces a limit on the percentage of resources 
														
 
															+          	allocated to a user at any given time, if there is competition 
														
 
															+          	for them. This user limit can vary between a minimum and maximum 
														
 
															+          	value. The former depends on the number of users who have submitted
														
 
															+          	jobs, and the latter is set to this property value. For example, 
														
 
															+          	suppose the value of this property is 25. If two users have 
														
 
															+          	submitted jobs to a queue, no single user can use more than 50% 
														
 
															+          	of the queue resources. If a third user submits a job, no single 
														
 
															+          	user can use more than 33% of the queue resources. With 4 or more 
														
 
															+          	users, no user can use more than 25% of the queue's resources. A 
														
 
															+          	value of 100 implies no user limits are imposed.</td>
														
 
															+          </tr>
														
 
															+          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.user-limit-factor</td>
														
 
															+            <td>The multiple of the queue capacity which can be configured to 
														
 
															+              allow a single user to acquire more slots. By default this is set 
														
 
															+              to 1 which ensure that a single user can never take more than the 
														
 
															+              queue's configured capacity irrespective of how idle th cluster 
														
 
															+              is.</td>
														
 
															           </tr>
														
 
															-          <tr><td>mapred.capacity-scheduler.task.limit.maxpmem</td>
														
 
															-          <td>Configuration which provides an upper limit to maximum physical
														
 
															-           memory which can be specified by a job. If a job requires more 
														
 
															-           physical memory than what is specified in this limit then the same
														
 
															-           is rejected.</td>
														
 
															+          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.supports-priority</td>
														
 
															+            <td>If true, priorities of jobs will be taken into account in scheduling 
														
 
															+            decisions.</td>
														
 
															           </tr>
														
 
															         </table>
														
 
															-      </section>
														
 
															+   </section>
														
 
															    <section>
														
 
															-        <title>Job Initialization Parameters</title>
														
 
															+        <title>Job initialization</title>
														
 
															         <p>Capacity scheduler lazily initializes the jobs before they are
														
 
															         scheduled, for reducing the memory footprint on jobtracker. 
														
 
															-        Following are the parameters, by which you can control the laziness
														
 
															-        of the job initialization. The following parameters can be 
														
 
															-        configured in capacity-scheduler.xml
														
 
															+        Following are the parameters, by which you can control the
														
 
															+        initialization of jobs per-queue.
														
 
															         </p>
														
 
															         <table>
														
 
															           <tr><th>Name</th><th>Description</th></tr>
														
 
															           <tr>
														
 
															             <td>
														
 
															-              mapred.capacity-scheduler.queue.&lt;queue-name&gt;.maximum-initialized-jobs-per-user
														
 
															+              mapred.capacity-scheduler.maximum-system-jobs
														
 
															             </td>
														
 
															             <td>
														
 
															-              Maximum number of jobs which are allowed to be pre-initialized for
														
 
															-              a particular user in the queue. Once a job is scheduled, i.e. 
														
 
															-              it starts running, then that job is not considered
														
 
															-              while scheduler computes the maximum job a user is allowed to
														
 
															-              initialize. 
														
 
															+              Maximum number of jobs in the system which can be initialized,
														
 
															+              concurrently, by the CapacityScheduler.
														
 
															+              
														
 
															+              Individual queue limits on initialized jobs are directly 
														
 
															+              proportional to their queue capacities.
														
 
															             </td>
														
 
															           </tr>
														
 
															           <tr>
														
 
															             <td>
														
 
															-              mapred.capacity-scheduler.init-poll-interval
														
 
															+              mapred.capacity-scheduler.queue.&lt;queue-name&gt;.maximum-initialized-active-tasks
														
 
															             </td>
														
 
															             <td>
														
 
															-              Amount of time in miliseconds which is used to poll the scheduler
														
 
															-              job queue to look for jobs to be initialized.
														
 
															+              The maximum number of tasks, across all jobs in the queue, 
														
 
															+              which can be initialized concurrently. Once the queue's jobs 
														
 
															+              exceed this limit they will be queued on disk.             
														
 
															             </td>
														
 
															           </tr>
														
 
															           <tr>
														
 
															             <td>
														
 
															-              mapred.capacity-scheduler.init-worker-threads
														
 
															+              mapred.capacity-scheduler.queue.&lt;queue-name&gt;.maximum-initialized-active-tasks-per-user
														
 
															             </td>
														
 
															             <td>
														
 
															-              Number of worker threads which would be used by Initialization
														
 
															-              poller to initialize jobs in a set of queue. If number mentioned 
														
 
															-              in property is equal to number of job queues then a thread is 
														
 
															-              assigned jobs from one queue. If the number configured is lesser than
														
 
															-              number of queues, then a thread can get jobs from more than one queue
														
 
															-              which it initializes in a round robin fashion. If the number configured
														
 
															-              is greater than number of queues, then number of threads spawned
														
 
															-              would be equal to number of job queues.
														
 
															+              The maximum number of tasks per-user, across all the of the
														
 
															+              user's jobs in the queue, which can be initialized concurrently. 
														
 
															+              Once the user's jobs exceed this limit they will be queued on disk.
														
 
															             </td>
														
 
															           </tr>
														
 
															+          <tr>
														
 
															+            <td> 
														
 
															+              mapred.capacity-scheduler.queue.&lt;queue-name&gt;.init-accept-jobs-factor
														
 
															+            </td>
														
 
															+            <td>
														
 
															+              The multipe of (maximum-system-jobs * queue-capacity) used to
														
 
															+              determine the number of jobs which are accepted by the scheduler. 
														
 
															+              The default value is 10. If number of jobs submitted to the queue
														
 
															+              exceeds this limit, job submission are rejected. 
														
 
															+            </td>
														
 
															+          </tr> 
														
 
															         </table>
														
 
															       </section>   
														
 
															+      </section>
														
 
															+      
														
 
															       <section>
														
 
															-        <title>Reviewing the configuration of the Capacity Scheduler</title>
														
 
															+        <title>Resource based scheduling</title>
														
 
															+      
														
 
															+        <p>The CapacityScheduler supports scheduling of tasks on a
														
 
															+        <code>TaskTracker</code>(TT) based on a job's memory requirements
														
 
															+        in terms of RAM and Virtual Memory (VMEM) on the TT node.
														
 
															+        A TT is conceptually composed of a fixed number of map and reduce
														
 
															+        slots with fixed slot size across the cluster. A job can ask for one
														
 
															+        or more slots for each of its component map and/or reduce slots. If a
														
 
															+        task consumes more memory than configured the TT forcibly kills the task.
														
 
															+        </p>
														
 
															+
														
 
															+        <p>Currently the memory based scheduling is only supported
														
 
															+        in Linux platform.</p>
														
 
															+        
														
 
															+        <p>Additional scheduler-based config 
														
 
															+        parameters are as follows:</p>
														
 
															+
														
 
															+        <table>
														
 
															+          <tr><th>Name</th><th>Description</th></tr>
														
 
															+          <tr>
														
 
															+            <td>mapred.cluster.map.memory.mb</td>
														
 
															+          	 <td>The size, in terms of virtual memory, of a single map slot
														
 
															+             in the Map-Reduce framework, used by the scheduler.
														
 
															+             A job can ask for multiple slots for a single map task via
														
 
															+             <code>mapred.job.map.memory.mb</code>, upto the limit specified by
														
 
															+             <code>mapred.cluster.max.map.memory.mb</code>, if the scheduler 
														
 
															+             supports the feature.
														
 
															+             The value of -1 indicates that this feature is turned off.
														
 
															+          	 </td>
														
 
															+          </tr>
														
 
															+          <tr>
														
 
															+            <td>mapred.cluster.reduce.memory.mb</td>
														
 
															+             <td>The size, in terms of virtual memory, of a single reduce slot
														
 
															+             in the Map-Reduce framework, used by the scheduler.
														
 
															+             A job can ask for multiple slots for a single reduce task via
														
 
															+             <code>mapred.job.reduce.memory.mb</code>, upto the limit specified by
														
 
															+             <code>mapred.cluster.max.reduce.memory.mb</code>, if the scheduler supports the 
														
 
															+             feature.The value of -1 indicates that this feature is turned off.
														
 
															+             </td>
														
 
															+          </tr>
														
 
															+          <tr>
														
 
															+            <td>mapred.cluster.max.map.memory.mb</td>
														
 
															+            <td>The maximum size, in terms of virtual memory, of a single map
														
 
															+            task launched by the Map-Reduce framework, used by the scheduler.
														
 
															+            A job can ask for multiple slots for a single map task via
														
 
															+            <code>mapred.job.map.memory.mb</code>, upto the limit specified by
														
 
															+            <code>mapred.cluster.max.map.memory.mb</code>, if the scheduler supports the 
														
 
															+            feature. The value of -1 indicates that this feature is turned off.
														
 
															+            </td>
														
 
															+          </tr>
														
 
															+          <tr>
														
 
															+            <td>mapred.cluster.max.reduce.memory.mb</td>
														
 
															+            <td>The maximum size, in terms of virtual memory, of a single reduce
														
 
															+            task launched by the Map-Reduce framework, used by the scheduler.
														
 
															+            A job can ask for multiple slots for a single reduce task via
														
 
															+            <code>mapred.job.reduce.memory.mb</code>, upto the limit specified by
														
 
															+            <code>mapred.cluster.max.reduce.memory.mb</code>, if the scheduler supports the 
														
 
															+            feature. The value of -1 indicates that this feature is turned off.
														
 
															+            </td>
														
 
															+          </tr>
														
 
															+          <tr>
														
 
															+            <td>mapred.job.map.memory.mb</td>
														
 
															+            <td>The size, in terms of virtual memory, of a single map task
														
 
															+            for the job. A job can ask for multiple slots for a single map task, 
														
 
															+            rounded up to the next multiple of <code>mapred.cluster.map.memory.mb</code> and 
														
 
															+            upto the limit specified by <code>mapred.cluster.max.map.memory.mb</code>, 
														
 
															+            if the scheduler supports the feature. The value of -1 indicates 
														
 
															+            that this feature is turned off iff <code>mapred.cluster.map.memory.mb</code> is 
														
 
															+            also turned off (-1).</td>
														
 
															+          </tr>
														
 
															+          <tr>
														
 
															+            <td>mapred.job.reduce.memory.mb</td>
														
 
															+            <td>The size, in terms of virtual memory, of a single reduce task
														
 
															+            for the job. A job can ask for multiple slots for a single reduce task, 
														
 
															+            rounded up to the next multiple of <code>mapred.cluster.reduce.memory.mb</code> and 
														
 
															+            upto the limit specified by <code>mapred.cluster.max.reduce.memory.mb</code>, 
														
 
															+            if the scheduler supports the feature. The value of -1 indicates 
														
 
															+            that this feature is turned off iff <code>mapred.cluster.reduce.memory.mb</code> is 
														
 
															+            also turned off (-1).</td>
														
 
															+          </tr>
														
 
															+        </table>
														
 
															+      </section>
														
 
															+      
														
 
															+      <section>
														
 
															+        <title>Reviewing the configuration of the CapacityScheduler</title>
														
 
															         <p>
														
 
															           Once the installation and configuration is completed, you can review
														
 
															           it after starting the MapReduce cluster from the admin UI.
														
@@ -370,10 +420,218 @@
 
															               Information</em> section of the page.</li>
														
 
															           <li>The properties for the queues should be visible in the <em>Scheduling
														
 
															               Information</em> column against each queue.</li>
														
 
															+          <li>The /scheduler web-page should show the resource usages of 
														
 
															+              individual queues.</li>
														
 
															         </ul>
														
 
															       </section>
														
 
															    </section>
														
 
															+
														
 
															+  <section>
														
 
															+    <title>Example</title>
														
 
															+    <p>Here is a practical example for using CapacityScheduler:</p>
														
 
															+    <table>
														
 
															+    <tr>
														
 
															+    <td>
														
 
															+<code>&lt;?xml version="1.0"?&gt;</code><br/>
														
 
															+<br/>
														
 
															+<code>&lt;configuration&gt;</code><br/>
														
 
															+<br/>
														
 
															+<code>  &lt;!-- system limit, across all queues --&gt;</code><br/>
														
 
															+<br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.maximum-system-jobs&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;3000&lt;/value&gt;</code><br/>
														
 
															+<code>    &lt;description&gt;Maximum number of jobs in the system which can be initialized,</code><br/>
														
 
															+<code>     concurrently, by the CapacityScheduler.</code><br/>
														
 
															+<code>    &lt;/description&gt;    </code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code> </code><br/>
														
 
															+<code>&lt;!-- queue: queueA --&gt;</code><br/>
														
 
															+<code> &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueA.capacity&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;8&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueA.supports-priority&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;false&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueA.minimum-user-limit-percent&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;20&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueA.user-limit-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;10&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueA.maximum-initialized-active-tasks&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;200000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueA.maximum-initialized-active-tasks-per-user&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;100000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueA.init-accept-jobs-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;100&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<br/>
														
 
															+<code>&lt;!-- queue: queueB --&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueB.capacity&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;2&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueB.supports-priority&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;false&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueB.minimum-user-limit-percent&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;20&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueB.user-limit-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;1&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueB.maximum-initialized-active-tasks&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;200000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueB.maximum-initialized-active-tasks-per-user&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;100000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueB.init-accept-jobs-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;10&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<br/>
														
 
															+<code>&lt;!-- queue: queueC --&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueC.capacity&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;30&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueC.supports-priority&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;false&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueC.minimum-user-limit-percent&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;20&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueC.user-limit-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;1&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueC.maximum-initialized-active-tasks&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;200000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueC.maximum-initialized-active-tasks-per-user&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;100000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueC.init-accept-jobs-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;10&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<br/>
														
 
															+<code>&lt;!-- queue: queueD --&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueD.capacity&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;1&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueD.supports-priority&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;false&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueD.minimum-user-limit-percent&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;20&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueD.user-limit-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;20&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueD.maximum-initialized-active-tasks&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;200000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueD.maximum-initialized-active-tasks-per-user&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;100000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueD.init-accept-jobs-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;10&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<br/>
														
 
															+<code>&lt;!-- queue: queueE --&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueE.capacity&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;31&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueE.supports-priority&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;false&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueE.minimum-user-limit-percent&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;20&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueE.user-limit-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;1&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueE.maximum-initialized-active-tasks&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;200000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueE.maximum-initialized-active-tasks-per-user&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;100000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueE.init-accept-jobs-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;10&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<br/>
														
 
															+<code>&lt;!-- queue: queueF --&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueF.capacity&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;28&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueF.supports-priority&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;false&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueF.minimum-user-limit-percent&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;20&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueF.user-limit-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;1&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueF.maximum-initialized-active-tasks&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;200000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueF.maximum-initialized-active-tasks-per-user&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;100000&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<code>  &lt;property&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;name&gt;mapred.capacity-scheduler.queue.queueF.init-accept-jobs-factor&lt;/name&gt;</code><br/>
														
 
															+<code>    &nbsp;&nbsp;&lt;value&gt;10&lt;/value&gt;</code><br/>
														
 
															+<code>  &lt;/property&gt;</code><br/>
														
 
															+<br/>
														
 
															+<code>&lt;/configuration&gt;</code><br/>
														
 
															+    </td>
														
 
															+    </tr>
														
 
															+    </table>
														
 
															+  </section>
														
 
															   </body>
														
 
															 </document>
														
--- a/src/docs/src/documentation/content/xdocs/fair_scheduler.xml
+++ b/src/docs/src/documentation/content/xdocs/fair_scheduler.xml
@@ -93,7 +93,7 @@
 
															       <p>
														
 
															         To run the fair scheduler in your Hadoop installation, you need to put
														
 
															         it on the CLASSPATH. The easiest way is to copy the 
														
 
															-        <em>hadoop-*-fairscheduler.jar</em> from
														
 
															+        <em>hadoop-fairscheduler-*.jar</em> from
														
 
															         <em>HADOOP_HOME/contrib/fairscheduler</em> to <em>HADOOP_HOME/lib</em>.
														
 
															         Alternatively you can modify <em>HADOOP_CLASSPATH</em> to include this jar, in
														
 
															         <em>HADOOP_CONF_DIR/hadoop-env.sh</em>
														
@@ -101,7 +101,7 @@
 
															       <p>
														
 
															         In order to compile fair scheduler, from sources execute <em> ant 
														
 
															         package</em> in source folder and copy the 
														
 
															-        <em>build/contrib/fair-scheduler/hadoop-*-fairscheduler.jar</em> 
														
 
															+        <em>build/contrib/fair-scheduler/hadoop-fairscheduler-*.jar</em> 
														
 
															         to <em>HADOOP_HOME/lib</em>
														
 
															       </p>
														
 
															       <p>
														
--- a/src/docs/src/documentation/content/xdocs/single_node_setup.xml
+++ b/src/docs/src/documentation/content/xdocs/single_node_setup.xml
@@ -146,7 +146,7 @@
 
															         <code>$ mkdir input</code><br/>
														
 
															         <code>$ cp conf/*.xml input</code><br/>
														
 
															         <code>
														
 
															-          $ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
														
 
															+          $ bin/hadoop jar hadoop-examples-*.jar grep input output 'dfs[a-z.]+'
														
 
															         </code><br/>
														
 
															         <code>$ cat output/*</code>
														
 
															       </p>
														
@@ -252,7 +252,7 @@
 
															         <p>
														
 
															           Run some of the examples provided:<br/>
														
 
															           <code>
														
 
															-            $ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
														
 
															+            $ bin/hadoop jar hadoop-examples-*.jar grep input output 'dfs[a-z.]+'
														
 
															           </code>
														
 
															         </p>
														
--- a/src/examples/org/apache/hadoop/examples/AggregateWordCount.java
+++ b/src/examples/org/apache/hadoop/examples/AggregateWordCount.java
@@ -34,7 +34,7 @@ import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob;
 
															  * text input files, breaks each line into words and counts them. The output is
														
 
															  * a locally sorted list of words and the count of how often they occurred.
														
 
															  * 
														
 
															- * To run: bin/hadoop jar hadoop-*-examples.jar aggregatewordcount <i>in-dir</i>
														
 
															+ * To run: bin/hadoop jar hadoop-examples-*.jar aggregatewordcount <i>in-dir</i>
														
 
															  * <i>out-dir</i> <i>numOfReducers</i> textinputformat
														
 
															  * 
														
 
															  */
														
--- a/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java
+++ b/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java
@@ -32,7 +32,7 @@ import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob;
 
															  * This is an example Aggregated Hadoop Map/Reduce application. Computes the
														
 
															  * histogram of the words in the input texts.
														
 
															  * 
														
 
															- * To run: bin/hadoop jar hadoop-*-examples.jar aggregatewordhist <i>in-dir</i>
														
 
															+ * To run: bin/hadoop jar hadoop-examples-*.jar aggregatewordhist <i>in-dir</i>
														
 
															  * <i>out-dir</i> <i>numOfReducers</i> textinputformat
														
 
															  * 
														
 
															  */
														
--- a/src/examples/org/apache/hadoop/examples/dancing/package.html
+++ b/src/examples/org/apache/hadoop/examples/dancing/package.html
@@ -59,8 +59,8 @@ Both applications have been added to the examples jar, so they can be
 
															 run as:
														
 
															 <pre>
														
 
															-bin/hadoop jar hadoop-*-examples.jar pentomino pent-outdir
														
 
															-bin/hadoop jar hadoop-*-examples.jar sudoku puzzle.txt
														
 
															+bin/hadoop jar hadoop-examples-*.jar pentomino pent-outdir
														
 
															+bin/hadoop jar hadoop-examples-*.jar sudoku puzzle.txt
														
 
															 </pre>
														
 
															 <p>
														
--- a/src/examples/org/apache/hadoop/examples/terasort/TeraGen.java
+++ b/src/examples/org/apache/hadoop/examples/terasort/TeraGen.java
@@ -55,7 +55,7 @@ import org.apache.hadoop.util.ToolRunner;
 
															  *
														
 
															  * <p>
														
 
															  * To run the program: 
														
 
															- * <b>bin/hadoop jar hadoop-*-examples.jar teragen 10000000000 in-dir</b>
														
 
															+ * <b>bin/hadoop jar hadoop-examples-*.jar teragen 10000000000 in-dir</b>
														
 
															  */
														
 
															 public class TeraGen extends Configured implements Tool {
														
--- a/src/examples/org/apache/hadoop/examples/terasort/TeraSort.java
+++ b/src/examples/org/apache/hadoop/examples/terasort/TeraSort.java
@@ -45,7 +45,7 @@ import org.apache.hadoop.util.ToolRunner;
 
															  * finish. 
														
 
															  * <p>
														
 
															  * To run the program: 
														
 
															- * <b>bin/hadoop jar hadoop-*-examples.jar terasort in-dir out-dir</b>
														
 
															+ * <b>bin/hadoop jar hadoop-examples-*.jar terasort in-dir out-dir</b>
														
 
															  */
														
 
															 public class TeraSort extends Configured implements Tool {
														
 
															   private static final Log LOG = LogFactory.getLog(TeraSort.class);
														
--- a/src/examples/org/apache/hadoop/examples/terasort/TeraValidate.java
+++ b/src/examples/org/apache/hadoop/examples/terasort/TeraValidate.java
@@ -44,7 +44,7 @@ import org.apache.hadoop.util.ToolRunner;
 
															  * Any output from the reduce is problem report.
														
 
															  * <p>
														
 
															  * To run the program: 
														
 
															- * <b>bin/hadoop jar hadoop-*-examples.jar teravalidate out-dir report-dir</b>
														
 
															+ * <b>bin/hadoop jar hadoop-examples-*.jar teravalidate out-dir report-dir</b>
														
 
															  * <p>
														
 
															  * If there is any output, something is wrong and the output of the reduce
														
 
															  * will have the problem report.
														
--- a/src/mapred/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
+++ b/src/mapred/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java
@@ -20,12 +20,12 @@ package org.apache.hadoop.mapred.lib;
 
															 import java.io.IOException;
														
 
															 import java.util.ArrayList;
														
 
															+import java.util.HashSet;
														
 
															 import java.util.List;
														
 
															 import java.util.HashMap;
														
 
															 import java.util.Set;
														
 
															 import java.util.Iterator;
														
 
															 import java.util.Map;
														
 
															-import java.util.Map.Entry;
														
 
															 import org.apache.hadoop.fs.FileSystem;
														
 
															 import org.apache.hadoop.fs.FileUtil;
														
@@ -73,6 +73,9 @@ public abstract class CombineFileInputFormat<K, V>
 
															   // across multiple pools.
														
 
															   private ArrayList<MultiPathFilter> pools = new  ArrayList<MultiPathFilter>();
														
 
															+  // mapping from a rack name to the set of Nodes in the rack 
														
 
															+  private static HashMap<String, Set<String>> rackToNodes = 
														
 
															+                            new HashMap<String, Set<String>>();
														
 
															   /**
														
 
															    * Specify the maximum size (in bytes) of each split. Each split is
														
 
															    * approximately equal to the specified size.
														
@@ -214,6 +217,8 @@ public abstract class CombineFileInputFormat<K, V>
 
															     getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), 
														
 
															                   maxSize, minSizeNode, minSizeRack, splits);
														
 
															+    // free up rackToNodes map
														
 
															+    rackToNodes.clear();
														
 
															     return splits.toArray(new CombineFileSplit[splits.size()]);    
														
 
															   }
														
@@ -341,7 +346,7 @@ public abstract class CombineFileInputFormat<K, V>
 
															             // create this split.
														
 
															             if (maxSize != 0 && curSplitSize >= maxSize) {
														
 
															               // create an input split and add it to the splits array
														
 
															-              addCreatedSplit(job, splits, racks, validBlocks);
														
 
															+              addCreatedSplit(job, splits, getHosts(racks), validBlocks);
														
 
															               createdSplit = true;
														
 
															               break;
														
 
															             }
														
@@ -360,7 +365,7 @@ public abstract class CombineFileInputFormat<K, V>
 
															           if (minSizeRack != 0 && curSplitSize >= minSizeRack) {
														
 
															             // if there is a mimimum size specified, then create a single split
														
 
															             // otherwise, store these blocks into overflow data structure
														
 
															-            addCreatedSplit(job, splits, racks, validBlocks);
														
 
															+            addCreatedSplit(job, splits, getHosts(racks), validBlocks);
														
 
															           } else {
														
 
															             // There were a few blocks in this rack that remained to be processed.
														
 
															             // Keep them in 'overflow' block list. These will be combined later.
														
@@ -393,7 +398,7 @@ public abstract class CombineFileInputFormat<K, V>
 
															       // create this split.
														
 
															       if (maxSize != 0 && curSplitSize >= maxSize) {
														
 
															         // create an input split and add it to the splits array
														
 
															-        addCreatedSplit(job, splits, racks, validBlocks);
														
 
															+        addCreatedSplit(job, splits, getHosts(racks), validBlocks);
														
 
															         curSplitSize = 0;
														
 
															         validBlocks.clear();
														
 
															         racks.clear();
														
@@ -402,7 +407,7 @@ public abstract class CombineFileInputFormat<K, V>
 
															     // Process any remaining blocks, if any.
														
 
															     if (!validBlocks.isEmpty()) {
														
 
															-      addCreatedSplit(job, splits, racks, validBlocks);
														
 
															+      addCreatedSplit(job, splits, getHosts(racks), validBlocks);
														
 
															     }
														
 
															   }
														
@@ -412,13 +417,12 @@ public abstract class CombineFileInputFormat<K, V>
 
															    */
														
 
															   private void addCreatedSplit(JobConf job,
														
 
															                                List<CombineFileSplit> splitList, 
														
 
															-                               List<String> racks, 
														
 
															+                               List<String> locations, 
														
 
															                                ArrayList<OneBlockInfo> validBlocks) {
														
 
															     // create an input split
														
 
															     Path[] fl = new Path[validBlocks.size()];
														
 
															     long[] offset = new long[validBlocks.size()];
														
 
															     long[] length = new long[validBlocks.size()];
														
 
															-    String[] rackLocations = racks.toArray(new String[racks.size()]);
														
 
															     for (int i = 0; i < validBlocks.size(); i++) {
														
 
															       fl[i] = validBlocks.get(i).onepath; 
														
 
															       offset[i] = validBlocks.get(i).offset;
														
@@ -427,7 +431,7 @@ public abstract class CombineFileInputFormat<K, V>
 
															      // add this split to the list that is returned
														
 
															     CombineFileSplit thissplit = new CombineFileSplit(job, fl, offset, 
														
 
															-                                                      length, rackLocations);
														
 
															+                                   length, locations.toArray(new String[0]));
														
 
															     splitList.add(thissplit); 
														
 
															   }
														
@@ -484,7 +488,9 @@ public abstract class CombineFileInputFormat<K, V>
 
															               rackToBlocks.put(rack, blklist);
														
 
															             }
														
 
															             blklist.add(oneblock);
														
 
															-          }
														
 
															+            // Add this host to rackToNodes map
														
 
															+            addHostToRack(oneblock.racks[j], oneblock.hosts[j]);
														
 
															+         }
														
 
															           // add this block to the node --> block map
														
 
															           for (int j = 0; j < oneblock.hosts.length; j++) {
														
@@ -547,6 +553,23 @@ public abstract class CombineFileInputFormat<K, V>
 
															     }
														
 
															   }
														
 
															+  private static void addHostToRack(String rack, String host) {
														
 
															+    Set<String> hosts = rackToNodes.get(rack);
														
 
															+    if (hosts == null) {
														
 
															+      hosts = new HashSet<String>();
														
 
															+      rackToNodes.put(rack, hosts);
														
 
															+    }
														
 
															+    hosts.add(host);
														
 
															+  }
														
 
															+  
														
 
															+  private static List<String> getHosts(List<String> racks) {
														
 
															+    List<String> hosts = new ArrayList<String>();
														
 
															+    for (String rack : racks) {
														
 
															+      hosts.addAll(rackToNodes.get(rack));
														
 
															+    }
														
 
															+    return hosts;
														
 
															+  }
														
 
															+  
														
 
															   /**
														
 
															    * Accept a path only if any one of filters given in the
														
 
															    * constructor do. 
														
--- a/src/test/org/apache/hadoop/fs/TestHarFileSystem.java
+++ b/src/test/org/apache/hadoop/fs/TestHarFileSystem.java
@@ -0,0 +1,240 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.fs;
														
 
															+
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.Iterator;
														
 
															+
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.fs.FSDataOutputStream;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.FsShell;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.io.LongWritable;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.mapred.FileInputFormat;
														
 
															+import org.apache.hadoop.mapred.FileOutputFormat;
														
 
															+import org.apache.hadoop.mapred.JobClient;
														
 
															+import org.apache.hadoop.mapred.JobConf;
														
 
															+import org.apache.hadoop.mapred.Mapper;
														
 
															+import org.apache.hadoop.mapred.MiniMRCluster;
														
 
															+import org.apache.hadoop.mapred.OutputCollector;
														
 
															+import org.apache.hadoop.mapred.Reducer;
														
 
															+import org.apache.hadoop.mapred.Reporter;
														
 
															+import org.apache.hadoop.mapred.TextInputFormat;
														
 
															+import org.apache.hadoop.mapred.TextOutputFormat;
														
 
															+import org.apache.hadoop.tools.HadoopArchives;
														
 
															+import org.apache.hadoop.util.ToolRunner;
														
 
															+
														
 
															+import junit.framework.TestCase;
														
 
															+
														
 
															+/**
														
 
															+ * test the har file system
														
 
															+ * create a har filesystem
														
 
															+ * run fs commands
														
 
															+ * and then run a map reduce job
														
 
															+ */
														
 
															+public class TestHarFileSystem extends TestCase {
														
 
															+  private Path inputPath;
														
 
															+  private MiniDFSCluster dfscluster;
														
 
															+  private MiniMRCluster mapred;
														
 
															+  private FileSystem fs;
														
 
															+  private Path filea, fileb, filec, filed;
														
 
															+  private Path archivePath;
														
 
															+  
														
 
															+  protected void setUp() throws Exception {
														
 
															+    super.setUp();
														
 
															+    dfscluster = new MiniDFSCluster(new JobConf(), 2, true, null);
														
 
															+    fs = dfscluster.getFileSystem();
														
 
															+    mapred = new MiniMRCluster(2, fs.getUri().toString(), 1);
														
 
															+    inputPath = new Path(fs.getHomeDirectory(), "test"); 
														
 
															+    filea = new Path(inputPath,"a");
														
 
															+    fileb = new Path(inputPath,"b");
														
 
															+    filec = new Path(inputPath,"c");
														
 
															+    // check for har containing escape worthy characters
														
 
															+    // in there name
														
 
															+    filed = new Path(inputPath, "d%d");
														
 
															+    archivePath = new Path(fs.getHomeDirectory(), "tmp");
														
 
															+  }
														
 
															+  
														
 
															+  protected void tearDown() throws Exception {
														
 
															+    try {
														
 
															+      if (mapred != null) {
														
 
															+        mapred.shutdown();
														
 
															+      }
														
 
															+      if (dfscluster != null) {
														
 
															+        dfscluster.shutdown();
														
 
															+      }
														
 
															+    } catch(Exception e) {
														
 
															+      System.err.println(e);
														
 
															+    }
														
 
															+    super.tearDown();
														
 
															+  }
														
 
															+  
														
 
															+  static class TextMapperReducer implements Mapper<LongWritable, Text, Text, Text>, 
														
 
															+            Reducer<Text, Text, Text, Text> {
														
 
															+    
														
 
															+    public void configure(JobConf conf) {
														
 
															+      //do nothing 
														
 
															+    }
														
 
															+
														
 
															+    public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
														
 
															+      output.collect(value, new Text(""));
														
 
															+    }
														
 
															+
														
 
															+    public void close() throws IOException {
														
 
															+      // do nothing
														
 
															+    }
														
 
															+
														
 
															+    public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
														
 
															+      while(values.hasNext()) { 
														
 
															+        values.next();
														
 
															+        output.collect(key, null);
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  public void testArchives() throws Exception {
														
 
															+    fs.mkdirs(inputPath);
														
 
															+    
														
 
															+    FSDataOutputStream out = fs.create(filea); 
														
 
															+    out.write("a".getBytes());
														
 
															+    out.close();
														
 
															+    out = fs.create(fileb);
														
 
															+    out.write("b".getBytes());
														
 
															+    out.close();
														
 
															+    out = fs.create(filec);
														
 
															+    out.write("c".getBytes());
														
 
															+    out.close();
														
 
															+    out = fs.create(filed);
														
 
															+    out.write("d".getBytes());
														
 
															+    out.close();
														
 
															+    Configuration conf = mapred.createJobConf();
														
 
															+    
														
 
															+    // check to see if fs.har.impl.disable.cache is true
														
 
															+    boolean archivecaching = conf.getBoolean("fs.har.impl.disable.cache", false);
														
 
															+    assertTrue(archivecaching);
														
 
															+    HadoopArchives har = new HadoopArchives(conf);
														
 
															+    String[] args = new String[3];
														
 
															+    //check for destination not specfied
														
 
															+    args[0] = "-archiveName";
														
 
															+    args[1] = "foo.har";
														
 
															+    args[2] = inputPath.toString();
														
 
															+    int ret = ToolRunner.run(har, args);
														
 
															+    assertTrue(ret != 0);
														
 
															+    args = new String[4];
														
 
															+    //check for wrong archiveName
														
 
															+    args[0] = "-archiveName";
														
 
															+    args[1] = "/d/foo.har";
														
 
															+    args[2] = inputPath.toString();
														
 
															+    args[3] = archivePath.toString();
														
 
															+    ret = ToolRunner.run(har, args);
														
 
															+    assertTrue(ret != 0);
														
 
															+//  se if dest is a file 
														
 
															+    args[1] = "foo.har";
														
 
															+    args[3] = filec.toString();
														
 
															+    ret = ToolRunner.run(har, args);
														
 
															+    assertTrue(ret != 0);
														
 
															+    //this is a valid run
														
 
															+    args[0] = "-archiveName";
														
 
															+    args[1] = "foo.har";
														
 
															+    args[2] = inputPath.toString();
														
 
															+    args[3] = archivePath.toString();
														
 
															+    ret = ToolRunner.run(har, args);
														
 
															+    //checl for the existenece of the archive
														
 
															+    assertTrue(ret == 0);
														
 
															+    ///try running it again. it should not 
														
 
															+    // override the directory
														
 
															+    ret = ToolRunner.run(har, args);
														
 
															+    assertTrue(ret != 0);
														
 
															+    Path finalPath = new Path(archivePath, "foo.har");
														
 
															+    Path fsPath = new Path(inputPath.toUri().getPath());
														
 
															+    String relative = fsPath.toString().substring(1);
														
 
															+    Path filePath = new Path(finalPath, relative);
														
 
															+    //make it a har path 
														
 
															+    Path harPath = new Path("har://" + filePath.toUri().getPath());
														
 
															+    assertTrue(fs.exists(new Path(finalPath, "_index")));
														
 
															+    assertTrue(fs.exists(new Path(finalPath, "_masterindex")));
														
 
															+    assertTrue(!fs.exists(new Path(finalPath, "_logs")));
														
 
															+    //creation tested
														
 
															+    //check if the archive is same
														
 
															+    // do ls and cat on all the files
														
 
															+    FsShell shell = new FsShell(conf);
														
 
															+    args = new String[2];
														
 
															+    args[0] = "-ls";
														
 
															+    args[1] = harPath.toString();
														
 
															+    ret = ToolRunner.run(shell, args);
														
 
															+    // ls should work.
														
 
															+    assertTrue((ret == 0));
														
 
															+    //now check for contents of filea
														
 
															+    // fileb and filec
														
 
															+    Path harFilea = new Path(harPath, "a");
														
 
															+    Path harFileb = new Path(harPath, "b");
														
 
															+    Path harFilec = new Path(harPath, "c");
														
 
															+    Path harFiled = new Path(harPath, "d%d");
														
 
															+    FileSystem harFs = harFilea.getFileSystem(conf);
														
 
															+    FSDataInputStream fin = harFs.open(harFilea);
														
 
															+    byte[] b = new byte[4];
														
 
															+    int readBytes = fin.read(b);
														
 
															+    fin.close();
														
 
															+    assertTrue("strings are equal ", (b[0] == "a".getBytes()[0]));
														
 
															+    fin = harFs.open(harFileb);
														
 
															+    fin.read(b);
														
 
															+    fin.close();
														
 
															+    assertTrue("strings are equal ", (b[0] == "b".getBytes()[0]));
														
 
															+    fin = harFs.open(harFilec);
														
 
															+    fin.read(b);
														
 
															+    fin.close();
														
 
															+    assertTrue("strings are equal ", (b[0] == "c".getBytes()[0]));
														
 
															+    fin = harFs.open(harFiled);
														
 
															+    fin.read(b);
														
 
															+    fin.close();
														
 
															+    assertTrue("strings are equal ", (b[0] == "d".getBytes()[0]));
														
 
															+    
														
 
															+    // ok all files match 
														
 
															+    // run a map reduce job
														
 
															+    Path outdir = new Path(fs.getHomeDirectory(), "mapout"); 
														
 
															+    JobConf jobconf = mapred.createJobConf();
														
 
															+    FileInputFormat.addInputPath(jobconf, harPath);
														
 
															+    jobconf.setInputFormat(TextInputFormat.class);
														
 
															+    jobconf.setOutputFormat(TextOutputFormat.class);
														
 
															+    FileOutputFormat.setOutputPath(jobconf, outdir);
														
 
															+    jobconf.setMapperClass(TextMapperReducer.class);
														
 
															+    jobconf.setMapOutputKeyClass(Text.class);
														
 
															+    jobconf.setMapOutputValueClass(Text.class);
														
 
															+    jobconf.setReducerClass(TextMapperReducer.class);
														
 
															+    jobconf.setNumReduceTasks(1);
														
 
															+    JobClient.runJob(jobconf);
														
 
															+    args[1] = outdir.toString();
														
 
															+    ret = ToolRunner.run(shell, args);
														
 
															+    
														
 
															+    FileStatus[] status = fs.globStatus(new Path(outdir, "part*"));
														
 
															+    Path reduceFile = status[0].getPath();
														
 
															+    FSDataInputStream reduceIn = fs.open(reduceFile);
														
 
															+    b = new byte[8];
														
 
															+    reduceIn.read(b);
														
 
															+    //assuming all the 8 bytes were read.
														
 
															+    Text readTxt = new Text(b);
														
 
															+    assertTrue("a\nb\nc\nd\n".equals(readTxt.toString()));
														
 
															+    assertTrue("number of bytes left should be -1", reduceIn.read(b) == -1);
														
 
															+    reduceIn.close();
														
 
															+  }
														
 
															+}
														
--- a/src/test/org/apache/hadoop/io/compress/TestCodec.java
+++ b/src/test/org/apache/hadoop/io/compress/TestCodec.java
@@ -19,13 +19,22 @@ package org.apache.hadoop.io.compress;
 
															 import java.io.BufferedInputStream;
														
 
															 import java.io.BufferedOutputStream;
														
 
															+import java.io.BufferedReader;
														
 
															+import java.io.BufferedWriter;
														
 
															 import java.io.ByteArrayOutputStream;
														
 
															 import java.io.DataInputStream;
														
 
															 import java.io.DataOutputStream;
														
 
															+import java.io.File;
														
 
															+import java.io.FileInputStream;
														
 
															+import java.io.FileOutputStream;
														
 
															 import java.io.IOException;
														
 
															 import java.io.InputStream;
														
 
															+import java.io.InputStreamReader;
														
 
															+import java.io.OutputStream;
														
 
															+import java.io.OutputStreamWriter;
														
 
															 import java.util.Arrays;
														
 
															 import java.util.Random;
														
 
															+import java.util.zip.GZIPInputStream;
														
 
															 import java.util.zip.GZIPOutputStream;
														
 
															 import junit.framework.TestCase;
														
@@ -45,8 +54,11 @@ import org.apache.hadoop.io.Text;
 
															 import org.apache.hadoop.io.Writable;
														
 
															 import org.apache.hadoop.util.ReflectionUtils;
														
 
															 import org.apache.hadoop.io.SequenceFile.CompressionType;
														
 
															+import org.apache.hadoop.io.compress.CompressorStream;
														
 
															 import org.apache.hadoop.io.compress.CompressionOutputStream;
														
 
															 import org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor;
														
 
															+import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
														
 
															+import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater;
														
 
															 import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel;
														
 
															 import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy;
														
 
															 import org.apache.hadoop.io.compress.zlib.ZlibFactory;
														
@@ -447,4 +459,156 @@ public class TestCodec extends TestCase {
 
															     super(name);
														
 
															   }
														
 
															+  public void testCodecPoolAndGzipDecompressor() {
														
 
															+    // BuiltInZlibInflater should not be used as the GzipCodec decompressor.
														
 
															+    // Assert that this is the case.
														
 
															+
														
 
															+    // Don't use native libs for this test.
														
 
															+    Configuration conf = new Configuration();
														
 
															+    conf.setBoolean("hadoop.native.lib", false);
														
 
															+    assertFalse("ZlibFactory is using native libs against request",
														
 
															+        ZlibFactory.isNativeZlibLoaded(conf));
														
 
															+
														
 
															+    // This should give us a BuiltInZlibInflater.
														
 
															+    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
														
 
															+    assertNotNull("zlibDecompressor is null!", zlibDecompressor);
														
 
															+    assertTrue("ZlibFactory returned unexpected inflator",
														
 
															+        zlibDecompressor instanceof BuiltInZlibInflater);
														
 
															+
														
 
															+    // its createOutputStream() just wraps the existing stream in a
														
 
															+    // java.util.zip.GZIPOutputStream.
														
 
															+    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
														
 
															+    CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
														
 
															+    assertTrue("Codec for .gz file is not GzipCodec", 
														
 
															+               codec instanceof GzipCodec);
														
 
															+
														
 
															+    // make sure we don't get a null decompressor
														
 
															+    Decompressor codecDecompressor = codec.createDecompressor();
														
 
															+    if (null == codecDecompressor) {
														
 
															+      fail("Got null codecDecompressor");
														
 
															+    }
														
 
															+
														
 
															+    // Asking the CodecPool for a decompressor for GzipCodec
														
 
															+    // should not return null
														
 
															+    Decompressor poolDecompressor = CodecPool.getDecompressor(codec);
														
 
															+    if (null == poolDecompressor) {
														
 
															+      fail("Got null poolDecompressor");
														
 
															+    }
														
 
															+    // return a couple decompressors
														
 
															+    CodecPool.returnDecompressor(zlibDecompressor);
														
 
															+    CodecPool.returnDecompressor(poolDecompressor);
														
 
															+    Decompressor poolDecompressor2 = CodecPool.getDecompressor(codec);
														
 
															+    if (poolDecompressor.getClass() == BuiltInGzipDecompressor.class) {
														
 
															+      if (poolDecompressor == poolDecompressor2) {
														
 
															+        fail("Reused java gzip decompressor in pool");
														
 
															+      }
														
 
															+    } else {
														
 
															+      if (poolDecompressor != poolDecompressor2) {
														
 
															+        fail("Did not reuse native gzip decompressor in pool");
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  public void testGzipCodecRead() throws IOException {
														
 
															+    // Create a gzipped file and try to read it back, using a decompressor
														
 
															+    // from the CodecPool.
														
 
															+
														
 
															+    // Don't use native libs for this test.
														
 
															+    Configuration conf = new Configuration();
														
 
															+    conf.setBoolean("hadoop.native.lib", false);
														
 
															+    assertFalse("ZlibFactory is using native libs against request",
														
 
															+        ZlibFactory.isNativeZlibLoaded(conf));
														
 
															+
														
 
															+    // Ensure that the CodecPool has a BuiltInZlibInflater in it.
														
 
															+    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
														
 
															+    assertNotNull("zlibDecompressor is null!", zlibDecompressor);
														
 
															+    assertTrue("ZlibFactory returned unexpected inflator",
														
 
															+        zlibDecompressor instanceof BuiltInZlibInflater);
														
 
															+    CodecPool.returnDecompressor(zlibDecompressor);
														
 
															+
														
 
															+    // Now create a GZip text file.
														
 
															+    String tmpDir = System.getProperty("test.build.data", "/tmp/");
														
 
															+    Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz");
														
 
															+    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
														
 
															+      new GZIPOutputStream(new FileOutputStream(f.toString()))));
														
 
															+    final String msg = "This is the message in the file!";
														
 
															+    bw.write(msg);
														
 
															+    bw.close();
														
 
															+
														
 
															+    // Now read it back, using the CodecPool to establish the
														
 
															+    // decompressor to use.
														
 
															+    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
														
 
															+    CompressionCodec codec = ccf.getCodec(f);
														
 
															+    Decompressor decompressor = CodecPool.getDecompressor(codec);
														
 
															+    FileSystem fs = FileSystem.getLocal(conf);
														
 
															+    InputStream is = fs.open(f);
														
 
															+    is = codec.createInputStream(is, decompressor);
														
 
															+    BufferedReader br = new BufferedReader(new InputStreamReader(is));
														
 
															+    String line = br.readLine();
														
 
															+    assertEquals("Didn't get the same message back!", msg, line);
														
 
															+    br.close();
														
 
															+  }
														
 
															+
														
 
															+  private void verifyGzipFile(String filename, String msg) throws IOException {
														
 
															+    BufferedReader r = new BufferedReader(new InputStreamReader(
														
 
															+        new GZIPInputStream(new FileInputStream(filename))));
														
 
															+    try {
														
 
															+      String line = r.readLine();
														
 
															+      assertEquals("Got invalid line back from " + filename, msg, line);
														
 
															+    } finally {
														
 
															+      r.close();
														
 
															+      new File(filename).delete();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  public void testGzipCodecWrite() throws IOException {
														
 
															+    // Create a gzipped file using a compressor from the CodecPool,
														
 
															+    // and try to read it back via the regular GZIPInputStream.
														
 
															+
														
 
															+    // Don't use native libs for this test.
														
 
															+    Configuration conf = new Configuration();
														
 
															+    conf.setBoolean("hadoop.native.lib", false);
														
 
															+    assertFalse("ZlibFactory is using native libs against request",
														
 
															+        ZlibFactory.isNativeZlibLoaded(conf));
														
 
															+
														
 
															+    // Ensure that the CodecPool has a BuiltInZlibDeflater in it.
														
 
															+    Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
														
 
															+    assertNotNull("zlibCompressor is null!", zlibCompressor);
														
 
															+    assertTrue("ZlibFactory returned unexpected deflator",
														
 
															+        zlibCompressor instanceof BuiltInZlibDeflater);
														
 
															+    CodecPool.returnCompressor(zlibCompressor);
														
 
															+
														
 
															+    // Create a GZIP text file via the Compressor interface.
														
 
															+    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
														
 
															+    CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
														
 
															+    assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
														
 
															+
														
 
															+    final String msg = "This is the message we are going to compress.";
														
 
															+    final String tmpDir = System.getProperty("test.build.data", "/tmp/");
														
 
															+    final String fileName = new Path(new Path(tmpDir),
														
 
															+        "testGzipCodecWrite.txt.gz").toString();
														
 
															+
														
 
															+    BufferedWriter w = null;
														
 
															+    Compressor gzipCompressor = CodecPool.getCompressor(codec);
														
 
															+    if (null != gzipCompressor) {
														
 
															+      // If it gives us back a Compressor, we should be able to use this
														
 
															+      // to write files we can then read back with Java's gzip tools.
														
 
															+      OutputStream os = new CompressorStream(new FileOutputStream(fileName),
														
 
															+          gzipCompressor);
														
 
															+      w = new BufferedWriter(new OutputStreamWriter(os));
														
 
															+      w.write(msg);
														
 
															+      w.close();
														
 
															+      CodecPool.returnCompressor(gzipCompressor);
														
 
															+
														
 
															+      verifyGzipFile(fileName, msg);
														
 
															+    }
														
 
															+
														
 
															+    // Create a gzip text file via codec.getOutputStream().
														
 
															+    w = new BufferedWriter(new OutputStreamWriter(
														
 
															+        codec.createOutputStream(new FileOutputStream(fileName))));
														
 
															+    w.write(msg);
														
 
															+    w.close();
														
 
															+
														
 
															+    verifyGzipFile(fileName, msg);
														
 
															+  }
														
 
															 }
														
--- a/src/test/org/apache/hadoop/mapred/lib/TestCombineFileInputFormat.java
+++ b/src/test/org/apache/hadoop/mapred/lib/TestCombineFileInputFormat.java
@@ -18,11 +18,6 @@
 
															 package org.apache.hadoop.mapred.lib;
														
 
															 import java.io.IOException;
														
 
															-import java.io.DataOutputStream;
														
 
															-import java.util.BitSet;
														
 
															-import java.util.HashMap;
														
 
															-import java.util.HashSet;
														
 
															-import java.util.Random;
														
 
															 import junit.framework.TestCase;
														
@@ -30,17 +25,12 @@ import org.apache.commons.logging.Log;
 
															 import org.apache.commons.logging.LogFactory;
														
 
															 import org.apache.hadoop.fs.FSDataOutputStream;
														
 
															 import org.apache.hadoop.fs.FileSystem;
														
 
															-import org.apache.hadoop.fs.FileStatus;
														
 
															 import org.apache.hadoop.fs.Path;
														
 
															 import org.apache.hadoop.io.Text;
														
 
															 import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															-import org.apache.hadoop.fs.BlockLocation;
														
 
															-import org.apache.hadoop.io.BytesWritable;
														
 
															 import org.apache.hadoop.hdfs.DFSTestUtil;
														
 
															 import org.apache.hadoop.hdfs.DistributedFileSystem;
														
 
															 import org.apache.hadoop.conf.Configuration;
														
 
															-import org.apache.hadoop.io.SequenceFile;
														
 
															-import org.apache.hadoop.io.SequenceFile.CompressionType;
														
 
															 import org.apache.hadoop.fs.PathFilter;
														
 
															 import org.apache.hadoop.mapred.InputSplit;
														
 
															 import org.apache.hadoop.mapred.JobConf;
														
@@ -151,14 +141,14 @@ public class TestCombineFileInputFormat extends TestCase{
 
															       assertEquals(fileSplit.getPath(1).getName(), file2.getName());
														
 
															       assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
														
 
															       assertEquals(fileSplit.getLength(1), BLOCKSIZE);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r2");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts2[0]); // should be on r2
														
 
															       fileSplit = (CombineFileSplit) splits[1];
														
 
															       assertEquals(fileSplit.getNumPaths(), 1);
														
 
															       assertEquals(fileSplit.getLocations().length, 1);
														
 
															       assertEquals(fileSplit.getPath(0).getName(), file1.getName());
														
 
															       assertEquals(fileSplit.getOffset(0), 0);
														
 
															       assertEquals(fileSplit.getLength(0), BLOCKSIZE);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r1");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts1[0]); // should be on r1
														
 
															       // create another file on 3 datanodes and 3 racks.
														
 
															       dfs.startDataNodes(conf, 1, true, null, rack3, hosts3, null);
														
@@ -186,7 +176,7 @@ public class TestCombineFileInputFormat extends TestCase{
 
															       assertEquals(fileSplit.getPath(2).getName(), file3.getName());
														
 
															       assertEquals(fileSplit.getOffset(2), 2 * BLOCKSIZE);
														
 
															       assertEquals(fileSplit.getLength(2), BLOCKSIZE);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r3");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts3[0]); // should be on r3
														
 
															       fileSplit = (CombineFileSplit) splits[1];
														
 
															       assertEquals(fileSplit.getNumPaths(), 2);
														
 
															       assertEquals(fileSplit.getLocations().length, 1);
														
@@ -196,14 +186,14 @@ public class TestCombineFileInputFormat extends TestCase{
 
															       assertEquals(fileSplit.getPath(1).getName(), file2.getName());
														
 
															       assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
														
 
															       assertEquals(fileSplit.getLength(1), BLOCKSIZE);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r2");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts2[0]); // should be on r2
														
 
															       fileSplit = (CombineFileSplit) splits[2];
														
 
															       assertEquals(fileSplit.getNumPaths(), 1);
														
 
															       assertEquals(fileSplit.getLocations().length, 1);
														
 
															       assertEquals(fileSplit.getPath(0).getName(), file1.getName());
														
 
															       assertEquals(fileSplit.getOffset(0), 0);
														
 
															       assertEquals(fileSplit.getLength(0), BLOCKSIZE);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r1");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts1[0]); // should be on r1
														
 
															       // create file4 on all three racks
														
 
															       Path file4 = new Path(dir4 + "/file4");
														
@@ -229,7 +219,7 @@ public class TestCombineFileInputFormat extends TestCase{
 
															       assertEquals(fileSplit.getPath(2).getName(), file3.getName());
														
 
															       assertEquals(fileSplit.getOffset(2), 2 * BLOCKSIZE);
														
 
															       assertEquals(fileSplit.getLength(2), BLOCKSIZE);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r3");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts3[0]); // should be on r3
														
 
															       fileSplit = (CombineFileSplit) splits[1];
														
 
															       assertEquals(fileSplit.getNumPaths(), 2);
														
 
															       assertEquals(fileSplit.getLocations().length, 1);
														
@@ -239,14 +229,14 @@ public class TestCombineFileInputFormat extends TestCase{
 
															       assertEquals(fileSplit.getPath(1).getName(), file2.getName());
														
 
															       assertEquals(fileSplit.getOffset(1), BLOCKSIZE);
														
 
															       assertEquals(fileSplit.getLength(1), BLOCKSIZE);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r2");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts2[0]); // should be on r2
														
 
															       fileSplit = (CombineFileSplit) splits[2];
														
 
															       assertEquals(fileSplit.getNumPaths(), 1);
														
 
															       assertEquals(fileSplit.getLocations().length, 1);
														
 
															       assertEquals(fileSplit.getPath(0).getName(), file1.getName());
														
 
															       assertEquals(fileSplit.getOffset(0), 0);
														
 
															       assertEquals(fileSplit.getLength(0), BLOCKSIZE);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r1");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts1[0]); // should be on r1
														
 
															       // maximum split size is 2 blocks 
														
 
															       inFormat = new DummyInputFormat();
														
@@ -385,7 +375,7 @@ public class TestCombineFileInputFormat extends TestCase{
 
															       assertEquals(fileSplit.getPath(0).getName(), file1.getName());
														
 
															       assertEquals(fileSplit.getOffset(0), 0);
														
 
															       assertEquals(fileSplit.getLength(0), BLOCKSIZE);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r1");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts1[0]); // should be on r1
														
 
															       // maximum split size is 7 blocks and min is 3 blocks
														
 
															       inFormat = new DummyInputFormat();
														
@@ -431,15 +421,15 @@ public class TestCombineFileInputFormat extends TestCase{
 
															       fileSplit = (CombineFileSplit) splits[0];
														
 
															       assertEquals(fileSplit.getNumPaths(), 2);
														
 
															       assertEquals(fileSplit.getLocations().length, 1);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r2");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts2[0]); // should be on r2
														
 
															       fileSplit = (CombineFileSplit) splits[1];
														
 
															       assertEquals(fileSplit.getNumPaths(), 1);
														
 
															       assertEquals(fileSplit.getLocations().length, 1);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r1");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts1[0]); // should be on r1
														
 
															       fileSplit = (CombineFileSplit) splits[2];
														
 
															       assertEquals(fileSplit.getNumPaths(), 6);
														
 
															       assertEquals(fileSplit.getLocations().length, 1);
														
 
															-      assertEquals(fileSplit.getLocations()[0], "/r3");
														
 
															+      assertEquals(fileSplit.getLocations()[0], hosts3[0]); // should be on r3
														
 
															     } finally {
														
 
															       if (dfs != null) {
														
 
															         dfs.shutdown();
														
--- a/src/test/org/apache/hadoop/metrics2/impl/TestMetricsConfig.java
+++ b/src/test/org/apache/hadoop/metrics2/impl/TestMetricsConfig.java
@@ -109,21 +109,6 @@ public class TestMetricsConfig {
 
															                  t2i1.getString("bar"));
														
 
															   }
														
 
															-  /**
														
 
															-   * Should throw if missing config files
														
 
															-   */
														
 
															-  @Test public void testMissingFiles() {
														
 
															-    try {
														
 
															-      MetricsConfig.create("JobTracker");
														
 
															-    }
														
 
															-    catch (MetricsConfigException e) {
														
 
															-      assertTrue("expected the 'cannot locate configuration' exception",
														
 
															-                 e.getMessage().startsWith("Cannot locate configuration"));
														
 
															-      return;
														
 
															-    }
														
 
															-    fail("should've thrown");
														
 
															-  }
														
 
															-
														
 
															   /**
														
 
															    * Test the config file load order
														
 
															    * @throws Exception
														
--- a/src/tools/org/apache/hadoop/tools/HadoopArchives.java
+++ b/src/tools/org/apache/hadoop/tools/HadoopArchives.java
@@ -0,0 +1,692 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import java.io.FileNotFoundException;
														
 
															+import java.io.IOException;
														
 
															+
														
 
															+import java.util.ArrayList;
														
 
															+import java.util.HashSet;
														
 
															+import java.util.Iterator;
														
 
															+import java.util.List;
														
 
															+import java.util.Map;
														
 
															+import java.util.Set;
														
 
															+import java.util.TreeMap;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FSDataInputStream;
														
 
															+import org.apache.hadoop.fs.FSDataOutputStream;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.HarFileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.io.IntWritable;
														
 
															+import org.apache.hadoop.io.LongWritable;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.mapred.FileInputFormat;
														
 
															+import org.apache.hadoop.mapred.FileOutputFormat;
														
 
															+import org.apache.hadoop.mapred.FileSplit;
														
 
															+import org.apache.hadoop.mapred.InputFormat;
														
 
															+import org.apache.hadoop.mapred.InputSplit;
														
 
															+import org.apache.hadoop.mapred.JobClient;
														
 
															+import org.apache.hadoop.mapred.JobConf;
														
 
															+import org.apache.hadoop.mapred.Mapper;
														
 
															+import org.apache.hadoop.mapred.OutputCollector;
														
 
															+import org.apache.hadoop.mapred.RecordReader;
														
 
															+import org.apache.hadoop.mapred.Reducer;
														
 
															+import org.apache.hadoop.mapred.SequenceFileRecordReader;
														
 
															+import org.apache.hadoop.mapred.Reporter;
														
 
															+import org.apache.hadoop.mapred.lib.NullOutputFormat;
														
 
															+import org.apache.hadoop.util.Tool;
														
 
															+import org.apache.hadoop.util.ToolRunner;
														
 
															+
														
 
															+
														
 
															+/**
														
 
															+ * a archive creation utility.
														
 
															+ * This class provides methods that can be used 
														
 
															+ * to create hadoop archives. For understanding of 
														
 
															+ * Hadoop archives look at {@link HarFileSystem}.
														
 
															+ */
														
 
															+public class HadoopArchives implements Tool {
														
 
															+  private static final Log LOG = LogFactory.getLog(HadoopArchives.class);
														
 
															+  
														
 
															+  private static final String NAME = "har"; 
														
 
															+  static final String SRC_LIST_LABEL = NAME + ".src.list";
														
 
															+  static final String DST_DIR_LABEL = NAME + ".dest.path";
														
 
															+  static final String TMP_DIR_LABEL = NAME + ".tmp.dir";
														
 
															+  static final String JOB_DIR_LABEL = NAME + ".job.dir";
														
 
															+  static final String SRC_COUNT_LABEL = NAME + ".src.count";
														
 
															+  static final String TOTAL_SIZE_LABEL = NAME + ".total.size";
														
 
															+  static final String DST_HAR_LABEL = NAME + ".archive.name";
														
 
															+  // size of each part file
														
 
															+  // its fixed for now.
														
 
															+  static final long partSize = 2 * 1024 * 1024 * 1024l;
														
 
															+
														
 
															+  private static final String usage = "archive"
														
 
															+  + " -archiveName NAME <src>* <dest>" +
														
 
															+  "\n";
														
 
															+  
														
 
															+ 
														
 
															+  private JobConf conf;
														
 
															+
														
 
															+  public void setConf(Configuration conf) {
														
 
															+    if (conf instanceof JobConf) {
														
 
															+      this.conf = (JobConf) conf;
														
 
															+    } else {
														
 
															+      this.conf = new JobConf(conf, HadoopArchives.class);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  public Configuration getConf() {
														
 
															+    return this.conf;
														
 
															+  }
														
 
															+
														
 
															+  public HadoopArchives(Configuration conf) {
														
 
															+    setConf(conf);
														
 
															+  }
														
 
															+
														
 
															+  // check the src paths
														
 
															+  private static void checkPaths(Configuration conf, List<Path> paths) throws
														
 
															+  IOException {
														
 
															+    for (Path p : paths) {
														
 
															+      FileSystem fs = p.getFileSystem(conf);
														
 
															+      if (!fs.exists(p)) {
														
 
															+        throw new FileNotFoundException("Source " + p + " does not exist.");
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * this assumes that there are two types of files file/dir
														
 
															+   * @param fs the input filesystem
														
 
															+   * @param p the top level path 
														
 
															+   * @param out the list of paths output of recursive ls
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  private void recursivels(FileSystem fs, Path p, List<FileStatus> out) 
														
 
															+  throws IOException {
														
 
															+    FileStatus fstatus = fs.getFileStatus(p);
														
 
															+    if (!fstatus.isDir()) {
														
 
															+      out.add(fstatus);
														
 
															+      return;
														
 
															+    }
														
 
															+    else {
														
 
															+      out.add(fstatus);
														
 
															+      FileStatus[] listStatus = fs.listStatus(p);
														
 
															+      for (FileStatus stat: listStatus) {
														
 
															+        recursivels(fs, stat.getPath(), out);
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Input format of a hadoop archive job responsible for 
														
 
															+   * generating splits of the file list
														
 
															+   */
														
 
															+
														
 
															+  static class HArchiveInputFormat implements InputFormat<LongWritable, Text> {
														
 
															+
														
 
															+    //generate input splits from the src file lists
														
 
															+    public InputSplit[] getSplits(JobConf jconf, int numSplits)
														
 
															+    throws IOException {
														
 
															+      String srcfilelist = jconf.get(SRC_LIST_LABEL, "");
														
 
															+      if ("".equals(srcfilelist)) {
														
 
															+          throw new IOException("Unable to get the " +
														
 
															+              "src file for archive generation.");
														
 
															+      }
														
 
															+      long totalSize = jconf.getLong(TOTAL_SIZE_LABEL, -1);
														
 
															+      if (totalSize == -1) {
														
 
															+        throw new IOException("Invalid size of files to archive");
														
 
															+      }
														
 
															+      //we should be safe since this is set by our own code
														
 
															+      Path src = new Path(srcfilelist);
														
 
															+      FileSystem fs = src.getFileSystem(jconf);
														
 
															+      FileStatus fstatus = fs.getFileStatus(src);
														
 
															+      ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
														
 
															+      LongWritable key = new LongWritable();
														
 
															+      Text value = new Text();
														
 
															+      SequenceFile.Reader reader = null;
														
 
															+      // the remaining bytes in the file split
														
 
															+      long remaining = fstatus.getLen();
														
 
															+      // the count of sizes calculated till now
														
 
															+      long currentCount = 0L;
														
 
															+      // the endposition of the split
														
 
															+      long lastPos = 0L;
														
 
															+      // the start position of the split
														
 
															+      long startPos = 0L;
														
 
															+      long targetSize = totalSize/numSplits;
														
 
															+      // create splits of size target size so that all the maps 
														
 
															+      // have equals sized data to read and write to.
														
 
															+      try {
														
 
															+        reader = new SequenceFile.Reader(fs, src, jconf);
														
 
															+        while(reader.next(key, value)) {
														
 
															+          if (currentCount + key.get() > targetSize && currentCount != 0){
														
 
															+            long size = lastPos - startPos;
														
 
															+            splits.add(new FileSplit(src, startPos, size, (String[]) null));
														
 
															+            remaining = remaining - size;
														
 
															+            startPos = lastPos;
														
 
															+            currentCount = 0L;
														
 
															+          }
														
 
															+          currentCount += key.get();
														
 
															+          lastPos = reader.getPosition();
														
 
															+        }
														
 
															+        // the remaining not equal to the target size.
														
 
															+        if (remaining != 0) {
														
 
															+          splits.add(new FileSplit(src, startPos, remaining, (String[])null));
														
 
															+        }
														
 
															+      }
														
 
															+      finally { 
														
 
															+        reader.close();
														
 
															+      }
														
 
															+      return splits.toArray(new FileSplit[splits.size()]);
														
 
															+    }
														
 
															+
														
 
															+    public RecordReader<LongWritable, Text> getRecordReader(InputSplit split,
														
 
															+        JobConf job, Reporter reporter) throws IOException {
														
 
															+      return new SequenceFileRecordReader<LongWritable, Text>(job,
														
 
															+                 (FileSplit)split);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private boolean checkValidName(String name) {
														
 
															+    Path tmp = new Path(name);
														
 
															+    if (tmp.depth() != 1) {
														
 
															+      return false;
														
 
															+    }
														
 
															+    if (name.endsWith(".har")) 
														
 
															+      return true;
														
 
															+    return false;
														
 
															+  }
														
 
															+  
														
 
															+
														
 
															+  private Path largestDepth(List<Path> paths) {
														
 
															+    Path deepest = paths.get(0);
														
 
															+    for (Path p: paths) {
														
 
															+      if (p.depth() > deepest.depth()) {
														
 
															+        deepest = p;
														
 
															+      }
														
 
															+    }
														
 
															+    return deepest;
														
 
															+  }
														
 
															+  
														
 
															+  // this method is tricky. This method writes 
														
 
															+  // the top level directories in such a way so that 
														
 
															+  // the output only contains valid directoreis in archives.
														
 
															+  // so for an input path specified by the user 
														
 
															+  // as /user/hadoop
														
 
															+  // we need to index 
														
 
															+  // / as the root 
														
 
															+  // /user as a directory
														
 
															+  // /user/hadoop as a directory
														
 
															+  // so for multiple input paths it makes sure that it
														
 
															+  // does the right thing.
														
 
															+  // so if the user specifies the input directories as 
														
 
															+  // /user/harry and /user/hadoop
														
 
															+  // we need to write / and user as its child
														
 
															+  // and /user and harry and hadoop as its children
														
 
															+  private void writeTopLevelDirs(SequenceFile.Writer srcWriter, 
														
 
															+      List<Path> paths) throws IOException {
														
 
															+    //these are qualified paths 
														
 
															+    List<Path> justDirs = new ArrayList<Path>();
														
 
															+    for (Path p: paths) {
														
 
															+      if (!p.getFileSystem(getConf()).isFile(p)) {
														
 
															+        justDirs.add(new Path(p.toUri().getPath()));
														
 
															+      }
														
 
															+      else {
														
 
															+        justDirs.add(new Path(p.getParent().toUri().getPath()));
														
 
															+      }
														
 
															+    }
														
 
															+    
														
 
															+    //get the largest depth path
														
 
															+    // this is tricky
														
 
															+    TreeMap<String, HashSet<String>> allpaths = new TreeMap<String, HashSet<String>>();
														
 
															+    Path deepest = largestDepth(paths);
														
 
															+    Path root = new Path(Path.SEPARATOR);
														
 
															+    for (int i = 0; i < deepest.depth(); i++) {
														
 
															+      List<Path> parents = new ArrayList<Path>();
														
 
															+      for (Path p: justDirs) {
														
 
															+        if (p.compareTo(root) == 0){
														
 
															+          //don nothing
														
 
															+        }
														
 
															+        else {
														
 
															+          Path parent = p.getParent();
														
 
															+          if (null != parent) {
														
 
															+            if (allpaths.containsKey(parent.toString())) {
														
 
															+              HashSet<String> children = allpaths.get(parent.toString());
														
 
															+              children.add(p.getName());
														
 
															+            } 
														
 
															+            else {
														
 
															+              HashSet<String> children = new HashSet<String>();
														
 
															+              children.add(p.getName());
														
 
															+              allpaths.put(parent.toString(), children);
														
 
															+            }
														
 
															+            parents.add(parent);
														
 
															+          }
														
 
															+        }
														
 
															+      }
														
 
															+      justDirs = parents;
														
 
															+    }
														
 
															+    Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
														
 
															+    for (Map.Entry<String, HashSet<String>> entry : keyVals) {
														
 
															+      HashSet<String> children = entry.getValue();
														
 
															+      String toWrite = entry.getKey() + " dir ";
														
 
															+      StringBuffer sbuff = new StringBuffer();
														
 
															+      sbuff.append(toWrite);
														
 
															+      for (String child: children) {
														
 
															+        sbuff.append(child + " ");
														
 
															+      }
														
 
															+      toWrite = sbuff.toString();
														
 
															+      srcWriter.append(new LongWritable(0L), new Text(toWrite));
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  /**archive the given source paths into
														
 
															+   * the dest
														
 
															+   * @param srcPaths the src paths to be archived
														
 
															+   * @param dest the dest dir that will contain the archive
														
 
															+   */
														
 
															+  public void archive(List<Path> srcPaths, String archiveName, Path dest) 
														
 
															+  throws IOException {
														
 
															+    checkPaths(conf, srcPaths);
														
 
															+    int numFiles = 0;
														
 
															+    long totalSize = 0;
														
 
															+    conf.set(DST_HAR_LABEL, archiveName);
														
 
															+    Path outputPath = new Path(dest, archiveName);
														
 
															+    FileOutputFormat.setOutputPath(conf, outputPath);
														
 
															+    FileSystem outFs = outputPath.getFileSystem(conf);
														
 
															+    if (outFs.exists(outputPath) || outFs.isFile(dest)) {
														
 
															+      throw new IOException("Invalid Output.");
														
 
															+    }
														
 
															+    conf.set(DST_DIR_LABEL, outputPath.toString());
														
 
															+    final String randomId = DistCp.getRandomId();
														
 
															+    Path jobDirectory = new Path(new JobClient(conf).getSystemDir(),
														
 
															+                          NAME + "_" + randomId);
														
 
															+    conf.set(JOB_DIR_LABEL, jobDirectory.toString());
														
 
															+    //get a tmp directory for input splits
														
 
															+    FileSystem jobfs = jobDirectory.getFileSystem(conf);
														
 
															+    jobfs.mkdirs(jobDirectory);
														
 
															+    Path srcFiles = new Path(jobDirectory, "_har_src_files");
														
 
															+    conf.set(SRC_LIST_LABEL, srcFiles.toString());
														
 
															+    SequenceFile.Writer srcWriter = SequenceFile.createWriter(jobfs, conf,
														
 
															+        srcFiles, LongWritable.class, Text.class, 
														
 
															+        SequenceFile.CompressionType.NONE);
														
 
															+    // get the list of files 
														
 
															+    // create single list of files and dirs
														
 
															+    try {
														
 
															+      // write the top level dirs in first 
														
 
															+      writeTopLevelDirs(srcWriter, srcPaths);
														
 
															+      srcWriter.sync();
														
 
															+      // these are the input paths passed 
														
 
															+      // from the command line
														
 
															+      // we do a recursive ls on these paths 
														
 
															+      // and then write them to the input file 
														
 
															+      // one at a time
														
 
															+      for (Path src: srcPaths) {
														
 
															+        FileSystem fs = src.getFileSystem(conf);
														
 
															+        ArrayList<FileStatus> allFiles = new ArrayList<FileStatus>();
														
 
															+        recursivels(fs, src, allFiles);
														
 
															+        for (FileStatus stat: allFiles) {
														
 
															+          String toWrite = "";
														
 
															+          long len = stat.isDir()? 0:stat.getLen();
														
 
															+          if (stat.isDir()) {
														
 
															+            toWrite = "" + fs.makeQualified(stat.getPath()) + " dir ";
														
 
															+            //get the children 
														
 
															+            FileStatus[] list = fs.listStatus(stat.getPath());
														
 
															+            StringBuffer sbuff = new StringBuffer();
														
 
															+            sbuff.append(toWrite);
														
 
															+            for (FileStatus stats: list) {
														
 
															+              sbuff.append(stats.getPath().getName() + " ");
														
 
															+            }
														
 
															+            toWrite = sbuff.toString();
														
 
															+          }
														
 
															+          else {
														
 
															+            toWrite +=  fs.makeQualified(stat.getPath()) + " file ";
														
 
															+          }
														
 
															+          srcWriter.append(new LongWritable(len), new 
														
 
															+              Text(toWrite));
														
 
															+          srcWriter.sync();
														
 
															+          numFiles++;
														
 
															+          totalSize += len;
														
 
															+        }
														
 
															+      }
														
 
															+    } finally {
														
 
															+      srcWriter.close();
														
 
															+    }
														
 
															+    //increase the replication of src files
														
 
															+    jobfs.setReplication(srcFiles, (short) 10);
														
 
															+    conf.setInt(SRC_COUNT_LABEL, numFiles);
														
 
															+    conf.setLong(TOTAL_SIZE_LABEL, totalSize);
														
 
															+    int numMaps = (int)(totalSize/partSize);
														
 
															+    //run atleast one map.
														
 
															+    conf.setNumMapTasks(numMaps == 0? 1:numMaps);
														
 
															+    conf.setNumReduceTasks(1);
														
 
															+    conf.setInputFormat(HArchiveInputFormat.class);
														
 
															+    conf.setOutputFormat(NullOutputFormat.class);
														
 
															+    conf.setMapperClass(HArchivesMapper.class);
														
 
															+    conf.setReducerClass(HArchivesReducer.class);
														
 
															+    conf.setMapOutputKeyClass(IntWritable.class);
														
 
															+    conf.setMapOutputValueClass(Text.class);
														
 
															+    conf.set("hadoop.job.history.user.location", "none");
														
 
															+    FileInputFormat.addInputPath(conf, jobDirectory);
														
 
															+    //make sure no speculative execution is done
														
 
															+    conf.setSpeculativeExecution(false);
														
 
															+    JobClient.runJob(conf);
														
 
															+    //delete the tmp job directory
														
 
															+    try {
														
 
															+      jobfs.delete(jobDirectory, true);
														
 
															+    } catch(IOException ie) {
														
 
															+      LOG.info("Unable to clean tmp directory " + jobDirectory);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  static class HArchivesMapper 
														
 
															+  implements Mapper<LongWritable, Text, IntWritable, Text> {
														
 
															+    private JobConf conf = null;
														
 
															+    int partId = -1 ; 
														
 
															+    Path tmpOutputDir = null;
														
 
															+    Path tmpOutput = null;
														
 
															+    String partname = null;
														
 
															+    FSDataOutputStream partStream = null;
														
 
															+    FileSystem destFs = null;
														
 
															+    byte[] buffer;
														
 
															+    int buf_size = 128 * 1024;
														
 
															+    
														
 
															+    // configure the mapper and create 
														
 
															+    // the part file.
														
 
															+    // use map reduce framework to write into
														
 
															+    // tmp files. 
														
 
															+    public void configure(JobConf conf) {
														
 
															+      this.conf = conf;
														
 
															+      // this is tightly tied to map reduce
														
 
															+      // since it does not expose an api 
														
 
															+      // to get the partition
														
 
															+      partId = conf.getInt("mapred.task.partition", -1);
														
 
															+      // create a file name using the partition
														
 
															+      // we need to write to this directory
														
 
															+      tmpOutputDir = FileOutputFormat.getWorkOutputPath(conf);
														
 
															+      // get the output path and write to the tmp 
														
 
															+      // directory 
														
 
															+      partname = "part-" + partId;
														
 
															+      tmpOutput = new Path(tmpOutputDir, partname);
														
 
															+      try {
														
 
															+        destFs = tmpOutput.getFileSystem(conf);
														
 
															+        //this was a stale copy
														
 
															+        if (destFs.exists(tmpOutput)) {
														
 
															+          destFs.delete(tmpOutput, false);
														
 
															+        }
														
 
															+        partStream = destFs.create(tmpOutput);
														
 
															+      } catch(IOException ie) {
														
 
															+        throw new RuntimeException("Unable to open output file " + tmpOutput);
														
 
															+      }
														
 
															+      buffer = new byte[buf_size];
														
 
															+    }
														
 
															+
														
 
															+    // copy raw data.
														
 
															+    public void copyData(Path input, FSDataInputStream fsin, 
														
 
															+        FSDataOutputStream fout, Reporter reporter) throws IOException {
														
 
															+      try {
														
 
															+        for (int cbread=0; (cbread = fsin.read(buffer))>= 0;) {
														
 
															+          fout.write(buffer, 0,cbread);
														
 
															+          reporter.progress();
														
 
															+        }
														
 
															+      } finally {
														
 
															+        fsin.close();
														
 
															+      }
														
 
															+    }
														
 
															+    
														
 
															+    // the relative path of p. basically 
														
 
															+    // getting rid of schema. Parsing and doing 
														
 
															+    // string manipulation is not good - so
														
 
															+    // just use the path api to do it.
														
 
															+    private Path makeRelative(Path p) {
														
 
															+      Path retPath = new Path(p.toUri().getPath());
														
 
															+      return retPath;
														
 
															+    }
														
 
															+    
														
 
															+    static class MapStat {
														
 
															+      private String pathname;
														
 
															+      private boolean isDir;
														
 
															+      private List<String> children;
														
 
															+      public MapStat(String line) {
														
 
															+        String[] splits = line.split(" ");
														
 
															+        pathname = splits[0];
														
 
															+        if ("dir".equals(splits[1])) {
														
 
															+          isDir = true;
														
 
															+        }
														
 
															+        else {
														
 
															+          isDir = false;
														
 
															+        }
														
 
															+        if (isDir) {
														
 
															+          children = new ArrayList<String>();
														
 
															+          for (int i = 2; i < splits.length; i++) {
														
 
															+            children.add(splits[i]);
														
 
															+          }
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    // read files from the split input 
														
 
															+    // and write it onto the part files.
														
 
															+    // also output hash(name) and string 
														
 
															+    // for reducer to create index 
														
 
															+    // and masterindex files.
														
 
															+    public void map(LongWritable key, Text value,
														
 
															+        OutputCollector<IntWritable, Text> out,
														
 
															+        Reporter reporter) throws IOException {
														
 
															+      String line  = value.toString();
														
 
															+      MapStat mstat = new MapStat(line);
														
 
															+      Path srcPath = new Path(mstat.pathname);
														
 
															+      String towrite = null;
														
 
															+      Path relPath = makeRelative(srcPath);
														
 
															+      int hash = HarFileSystem.getHarHash(relPath);
														
 
															+      long startPos = partStream.getPos();
														
 
															+      if (mstat.isDir) { 
														
 
															+        towrite = relPath.toString() + " " + "dir none " + 0 + " " + 0 + " ";
														
 
															+        StringBuffer sbuff = new StringBuffer();
														
 
															+        sbuff.append(towrite);
														
 
															+        for (String child: mstat.children) {
														
 
															+          sbuff.append(child + " ");
														
 
															+        }
														
 
															+        towrite = sbuff.toString();
														
 
															+        //reading directories is also progress
														
 
															+        reporter.progress();
														
 
															+      }
														
 
															+      else {
														
 
															+        FileSystem srcFs = srcPath.getFileSystem(conf);
														
 
															+        FileStatus srcStatus = srcFs.getFileStatus(srcPath);
														
 
															+        FSDataInputStream input = srcFs.open(srcStatus.getPath());
														
 
															+        reporter.setStatus("Copying file " + srcStatus.getPath() + 
														
 
															+            " to archive.");
														
 
															+        copyData(srcStatus.getPath(), input, partStream, reporter);
														
 
															+        towrite = relPath.toString() + " file " + partname + " " + startPos
														
 
															+        + " " + srcStatus.getLen() + " ";
														
 
															+      }
														
 
															+      out.collect(new IntWritable(hash), new Text(towrite));
														
 
															+    }
														
 
															+    
														
 
															+    public void close() throws IOException {
														
 
															+      // close the part files.
														
 
															+      partStream.close();
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  /** the reduce for creating the index and the master index 
														
 
															+   * 
														
 
															+   */
														
 
															+  static class HArchivesReducer implements Reducer<IntWritable, 
														
 
															+  Text, Text, Text> {
														
 
															+    private JobConf conf = null;
														
 
															+    private long startIndex = 0;
														
 
															+    private long endIndex = 0;
														
 
															+    private long startPos = 0;
														
 
															+    private Path masterIndex = null;
														
 
															+    private Path index = null;
														
 
															+    private FileSystem fs = null;
														
 
															+    private FSDataOutputStream outStream = null;
														
 
															+    private FSDataOutputStream indexStream = null;
														
 
															+    private int numIndexes = 1000;
														
 
															+    private Path tmpOutputDir = null;
														
 
															+    private int written = 0;
														
 
															+    private int keyVal = 0;
														
 
															+    
														
 
															+    // configure 
														
 
															+    public void configure(JobConf conf) {
														
 
															+      this.conf = conf;
														
 
															+      tmpOutputDir = FileOutputFormat.getWorkOutputPath(this.conf);
														
 
															+      masterIndex = new Path(tmpOutputDir, "_masterindex");
														
 
															+      index = new Path(tmpOutputDir, "_index");
														
 
															+      try {
														
 
															+        fs = masterIndex.getFileSystem(conf);
														
 
															+        if (fs.exists(masterIndex)) {
														
 
															+          fs.delete(masterIndex, false);
														
 
															+        }
														
 
															+        if (fs.exists(index)) {
														
 
															+          fs.delete(index, false);
														
 
															+        }
														
 
															+        indexStream = fs.create(index);
														
 
															+        outStream = fs.create(masterIndex);
														
 
															+        String version = HarFileSystem.VERSION + " \n";
														
 
															+        outStream.write(version.getBytes());
														
 
															+        
														
 
															+      } catch(IOException e) {
														
 
															+        throw new RuntimeException(e);
														
 
															+      }
														
 
															+    }
														
 
															+    
														
 
															+    // create the index and master index. The input to 
														
 
															+    // the reduce is already sorted by the hash of the 
														
 
															+    // files. SO we just need to write it to the index. 
														
 
															+    // We update the masterindex as soon as we update 
														
 
															+    // numIndex entries.
														
 
															+    public void reduce(IntWritable key, Iterator<Text> values,
														
 
															+        OutputCollector<Text, Text> out,
														
 
															+        Reporter reporter) throws IOException {
														
 
															+      keyVal = key.get();
														
 
															+      while(values.hasNext()) {
														
 
															+        Text value = values.next();
														
 
															+        String towrite = value.toString() + "\n";
														
 
															+        indexStream.write(towrite.getBytes());
														
 
															+        written++;
														
 
															+        if (written > numIndexes -1) {
														
 
															+          // every 1000 indexes we report status
														
 
															+          reporter.setStatus("Creating index for archives");
														
 
															+          reporter.progress();
														
 
															+          endIndex = keyVal;
														
 
															+          String masterWrite = startIndex + " " + endIndex + " " + startPos 
														
 
															+                              +  " " + indexStream.getPos() + " \n" ;
														
 
															+          outStream.write(masterWrite.getBytes());
														
 
															+          startPos = indexStream.getPos();
														
 
															+          startIndex = endIndex;
														
 
															+          written = 0;
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    
														
 
															+    public void close() throws IOException {
														
 
															+      //write the last part of the master index.
														
 
															+      if (written > 0) {
														
 
															+        String masterWrite = startIndex + " " + keyVal + " " + startPos  +
														
 
															+                             " " + indexStream.getPos() + " \n";
														
 
															+        outStream.write(masterWrite.getBytes());
														
 
															+      }
														
 
															+      // close the streams
														
 
															+      outStream.close();
														
 
															+      indexStream.close();
														
 
															+      // try increasing the replication 
														
 
															+      fs.setReplication(index, (short) 10);
														
 
															+      fs.setReplication(masterIndex, (short) 10);
														
 
															+    }
														
 
															+    
														
 
															+  }
														
 
															+  
														
 
															+  /** the main driver for creating the archives
														
 
															+   *  it takes at least two command line parameters. The src and the 
														
 
															+   *  dest. It does an lsr on the source paths.
														
 
															+   *  The mapper created archuves and the reducer creates 
														
 
															+   *  the archive index.
														
 
															+   */
														
 
															+
														
 
															+  public int run(String[] args) throws Exception {
														
 
															+    try {
														
 
															+      List<Path> srcPaths = new ArrayList<Path>();
														
 
															+      Path destPath = null;
														
 
															+      // check we were supposed to archive or 
														
 
															+      // unarchive
														
 
															+      String archiveName = null;
														
 
															+      if (args.length < 4) {
														
 
															+        System.out.println(usage);
														
 
															+        throw new IOException("Invalid usage.");
														
 
															+      }
														
 
															+      if (!"-archiveName".equals(args[0])) {
														
 
															+        System.out.println(usage);
														
 
															+        throw new IOException("Archive Name not specified.");
														
 
															+      }
														
 
															+      archiveName = args[1];
														
 
															+      if (!checkValidName(archiveName)) {
														
 
															+        System.out.println(usage);
														
 
															+        throw new IOException("Invalid name for archives. " + archiveName);
														
 
															+      }
														
 
															+      for (int i = 2; i < args.length; i++) {
														
 
															+        if (i == (args.length - 1)) {
														
 
															+          destPath = new Path(args[i]);
														
 
															+        }
														
 
															+        else {
														
 
															+          srcPaths.add(new Path(args[i]));
														
 
															+        }
														
 
															+      }
														
 
															+      if (srcPaths.size() == 0) {
														
 
															+        System.out.println(usage);
														
 
															+        throw new IOException("Invalid Usage: No input sources specified.");
														
 
															+      }
														
 
															+      // do a glob on the srcPaths and then pass it on
														
 
															+      List<Path> globPaths = new ArrayList<Path>();
														
 
															+      for (Path p: srcPaths) {
														
 
															+        FileSystem fs = p.getFileSystem(getConf());
														
 
															+        FileStatus[] statuses = fs.globStatus(p);
														
 
															+        for (FileStatus status: statuses) {
														
 
															+          globPaths.add(fs.makeQualified(status.getPath()));
														
 
															+        }
														
 
															+      }
														
 
															+      archive(globPaths, archiveName, destPath);
														
 
															+    } catch(IOException ie) {
														
 
															+      System.err.println(ie.getLocalizedMessage());
														
 
															+      return -1;
														
 
															+    }
														
 
															+    return 0;
														
 
															+  }
														
 
															+
														
 
															+  /** the main functions **/
														
 
															+  public static void main(String[] args) {
														
 
															+    JobConf job = new JobConf(HadoopArchives.class);
														
 
															+    HadoopArchives harchives = new HadoopArchives(job);
														
 
															+    int ret = 0;
														
 
															+
														
 
															+    try{
														
 
															+      ret = ToolRunner.run(harchives, args);
														
 
															+    } catch(Exception e) {
														
 
															+      LOG.debug("Exception in archives  ", e);
														
 
															+      System.err.println("Exception in archives");
														
 
															+      System.err.println(e.getLocalizedMessage());
														
 
															+      System.exit(1);
														
 
															+    }
														
 
															+    System.exit(ret);
														
 
															+  }
														
 
															+}