浏览代码

HADOOP-3992 Add a synthetic load generation facility to the test directory. Contributed by Hairong Kuang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@694458 13f79535-47bb-0310-9956-ffa450edef68
Tsz-wo Sze 16 年之前
父节点
当前提交
573f384bcf

+ 3 - 0
CHANGES.txt

@@ -148,6 +148,9 @@ Trunk (unreleased changes)
     HADOOP-3445. Add capacity scheduler that provides guaranteed capacities to 
     queues as a percentage of the cluster. (Vivek Ratan via omalley)
 
+    HADOOP-3992. Add a synthetic load generation facility to the test
+    directory. (hairong via szetszwo)
+
   IMPROVEMENTS
 
     HADOOP-3908. Fuse-dfs: better error message if llibhdfs.so doesn't exist.

+ 494 - 0
docs/SLG_user_guide.html

@@ -0,0 +1,494 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="pelt">
+<title> Synthetic Load Generator User Guide </title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://hadoop.apache.org/">Hadoop</a> &gt; <a href="http://hadoop.apache.org/core/">Core</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://hadoop.apache.org/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Apache Hadoop"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/core-logo.gif" title="Scalable Computing Platform"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://www.google.com/search" method="get" class="roundtopsmall">
+<input value="hadoop.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li>
+<a class="unselected" href="http://hadoop.apache.org/core/">Project</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/hadoop">Wiki</a>
+</li>
+<li class="current">
+<a class="selected" href="index.html">Hadoop 0.19 Documentation</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Documentation</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menuitem">
+<a href="index.html">Overview</a>
+</div>
+<div class="menuitem">
+<a href="quickstart.html">Quickstart</a>
+</div>
+<div class="menuitem">
+<a href="cluster_setup.html">Cluster Setup</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_design.html">HDFS Architecture</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_user_guide.html">HDFS User Guide</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_permissions_guide.html">HDFS Permissions Guide</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_quota_admin_guide.html">HDFS Quotas Administrator Guide</a>
+</div>
+<div class="menuitem">
+<a href="commands_manual.html">Commands Manual</a>
+</div>
+<div class="menuitem">
+<a href="hdfs_shell.html">FS Shell Guide</a>
+</div>
+<div class="menupage">
+<div class="menupagetitle">Synthetic Load Generator User Guide</div>
+</div>
+<div class="menuitem">
+<a href="distcp.html">DistCp Guide</a>
+</div>
+<div class="menuitem">
+<a href="mapred_tutorial.html">Map-Reduce Tutorial</a>
+</div>
+<div class="menuitem">
+<a href="native_libraries.html">Native Hadoop Libraries</a>
+</div>
+<div class="menuitem">
+<a href="streaming.html">Streaming</a>
+</div>
+<div class="menuitem">
+<a href="hadoop_archives.html">Hadoop Archives</a>
+</div>
+<div class="menuitem">
+<a href="hod.html">Hadoop On Demand</a>
+</div>
+<div class="menuitem">
+<a href="api/index.html">API Docs</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/">Wiki</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
+</div>
+<div class="menuitem">
+<a href="http://hadoop.apache.org/core/mailing_lists.html">Mailing Lists</a>
+</div>
+<div class="menuitem">
+<a href="releasenotes.html">Release Notes</a>
+</div>
+<div class="menuitem">
+<a href="changes.html">All Changes</a>
+</div>
+</div>
+<div id="credit"></div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="SLG_user_guide.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1> Synthetic Load Generator User Guide </h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#Description"> Description </a>
+</li>
+<li>
+<a href="#Synopsis"> Synopsis </a>
+</li>
+<li>
+<a href="#Test+Space+Population"> Test Space Population </a>
+<ul class="minitoc">
+<li>
+<a href="#Structure+Generator"> Structure Generator </a>
+</li>
+<li>
+<a href="#Test+Space+Generator"> Test Space Generator </a>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+		
+<a name="N1000D"></a><a name="Description"></a>
+<h2 class="h3"> Description </h2>
+<div class="section">
+<p>
+        The synthetic load generator is a tool for testing NameNode behavior
+        under different client loads. The user can generate different mixes 
+        of read, write, and list requests by specifying the probabilities of
+        read and write. The user controls the intensity of the load by adjusting
+        parameters for the number of worker threads and the delay between 
+        operations. While load generators are running, the user can profile and
+        monitor the running of the NameNode. When a load generator exits, it
+        prints some NameNode statistics like the average execution time of each
+        kind of operation and the NameNode throughput.
+                       </p>
+</div>
+		
+<a name="N10017"></a><a name="Synopsis"></a>
+<h2 class="h3"> Synopsis </h2>
+<div class="section">
+<p>
+        
+<span class="codefrag">java LoadGenerator [options]</span>
+<br>
+                        
+</p>
+<p>
+        Options include:<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-readProbability &lt;read probability&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the probability of the read operation;
+                default is 0.3333. </span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-writeProbability &lt;write probability&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the probability of the write 
+                operations; default is 0.3333.</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-root &lt;test space root&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the root of the test space;
+                default is /testLoadSpace.</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-maxDelayBetweenOps 
+                &lt;maxDelayBetweenOpsInMillis&gt;</span>
+<br> 
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the maximum delay between two consecutive
+                operations in a thread; default is 0 indicating no delay.
+                </span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-numOfThreads &lt;numOfThreads&gt;</span>
+<br> 
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the number of threads to spawn; 
+                default is 200.</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-elapsedTime &lt;elapsedTimeInSecs&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the number of seconds that the program 
+                will run; A value of zero indicates that the program runs
+                forever. The default value is 0.</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-startTime &lt;startTimeInMillis&gt;</span>
+<br> 
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the time that all worker threads 
+                start to run. By default it is 10 seconds after the main 
+                program starts running.This creates a barrier if more than
+                one load generator is running.
+        </span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-seed &lt;seed&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the random generator seed for repeating 
+                requests to NameNode when running with a single thread;
+                default is the current time.</span>
+<br>
+			
+</p>
+<p>
+        After command line argument parsing, the load generator traverses 
+        the test space and builds a table of all directories and another table
+        of all files in the test space. It then waits until the start time to
+        spawn the number of worker threads as specified by the user. Each
+        thread sends a stream of requests to NameNode. At each iteration, 
+        it first decides if it is going to read a file, create a file, or
+        list a directory following the read and write probabilities specified
+        by the user. The listing probability is equal to 
+        <em>1-read probability-write probability</em>. When reading, 
+        it randomly picks a file in the test space and reads the entire file. 
+        When writing, it randomly picks a directory in the test space and 
+        creates a file there. To avoid two threads with the same load 
+        generator or from two different load generators create the same 
+        file, the file name consists of the current machine's host name 
+        and the thread id. The length of the file follows Gaussian 
+        distribution with an average size of 2 blocks and the standard 
+        deviation of 1. The new file is filled with byte 'a'. To avoid
+        the test space to grow indefinitely, the file is deleted immediately
+        after the file creation completes. While listing, it randomly 
+        picks a directory in the test space and lists its content. 
+        After an operation completes, the thread pauses for a random 
+        amount of time in the range of [0, maxDelayBetweenOps] if the 
+        specified maximum delay is not zero. All threads are stopped when 
+        the specified elapsed time is passed. Before exiting, the program 
+        prints the average execution for each kind of NameNode operations, 
+        and the number of requests served by the NameNode per second.
+                        </p>
+</div>
+                
+<a name="N10070"></a><a name="Test+Space+Population"></a>
+<h2 class="h3"> Test Space Population </h2>
+<div class="section">
+<p>
+        The user needs to populate a test space before she runs a 
+        load generator. The structure generator generates a random 
+        test space structure and the data generator creates the files 
+        and directories of the test space in Hadoop distributed file system.
+                        </p>
+<a name="N10079"></a><a name="Structure+Generator"></a>
+<h3 class="h4"> Structure Generator </h3>
+<p>
+        This tool generates a random namespace structure with the 
+        following constraints:
+                                </p>
+<ol>
+        
+<li>The number of subdirectories that a directory can have is 
+            a random number in [minWidth, maxWidth].</li>
+        
+<li>The maximum depth of each subdirectory is a random number 
+            [2*maxDepth/3, maxDepth].</li>
+        
+<li>Files are randomly placed in leaf directories. The size of 
+            each file follows Gaussian distribution with an average size 
+            of 1 block and a standard deviation of 1.</li>
+                                        
+</ol>
+<p>
+        The generated namespace structure is described by two files in 
+        the output directory. Each line of the first file contains the 
+        full name of a leaf directory. Each line of the second file 
+        contains the full name of a file and its size, separated by a blank.
+                                </p>
+<p>
+        The synopsis of the command is
+                                </p>
+<p>
+        
+<span class="codefrag">java StructureGenerator [options]</span>
+                                
+</p>
+<p>
+        Options include:<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-maxDepth &lt;maxDepth&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;maximum depth of the directory tree; 
+                default is 5.</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-minWidth &lt;minWidth&gt;</span>
+<br> 
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;minimum number of subdirectories per 
+                directories; default is 1.</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-maxWidth &lt;maxWidth&gt;</span>
+<br> 
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;maximum number of subdirectories per 
+                directories; default is 5.</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-numOfFiles &lt;#OfFiles&gt;</span>
+<br> 
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the total number of files in the test 
+                space; default is 10.</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-avgFileSize &lt;avgFileSizeInBlocks&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;average size of blocks; default is 1.
+                </span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-outDir &lt;outDir&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;output directory; default is the 
+                current directory. </span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-seed &lt;seed&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;random number generator seed; 
+                default is the current time.</span>
+<br>
+                                
+</p>
+<a name="N100D8"></a><a name="Test+Space+Generator"></a>
+<h3 class="h4"> Test Space Generator </h3>
+<p>
+        This tool reads the directory structure and file structure from 
+        the input directory and creates the namespace in Hadoop distributed
+        file system. All files are filled with byte 'a'.
+                                </p>
+<p>
+        The synopsis of the command is
+                                </p>
+<p>
+        
+<span class="codefrag">java DataGenerator [options]</span>
+                                
+</p>
+<p>
+        Options include:<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-inDir &lt;inDir&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;input directory name where directory/file
+                structures are stored; default is the current directory.
+        </span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;-root &lt;test space root&gt;</span>
+<br>
+        
+<span class="codefrag">&nbsp;&nbsp;&nbsp;&nbsp;the name of the root directory which the 
+                new namespace is going to be placed under; 
+                default is "/testLoadSpace".</span>
+<br>
+                                
+</p>
+</div>
+	
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2008 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

文件差异内容过多而无法显示
+ 100 - 0
docs/SLG_user_guide.pdf


+ 40 - 9
docs/changes.html

@@ -94,7 +94,7 @@ to a separate task.<br />(Amareshwari Sriramadasu via ddas)</li>
     </ol>
   </li>
   <li><a href="javascript:toggleList('trunk_(unreleased_changes)_._new_features_')">  NEW FEATURES
-</a>&nbsp;&nbsp;&nbsp;(23)
+</a>&nbsp;&nbsp;&nbsp;(29)
     <ol id="trunk_(unreleased_changes)_._new_features_">
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3341">HADOOP-3341</a>. Allow streaming jobs to specify the field separator for map
 and reduce input and output. The new configuration values are:
@@ -147,10 +147,20 @@ query processing tool that uses map/reduce.<br />(Ashish Thusoo via omalley)</li
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3866">HADOOP-3866</a>. Added sort and multi-job updates in the JobTracker web ui.<br />(Craig Weisenfluh via omalley)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3698">HADOOP-3698</a>. Add access control to control who is allowed to submit or
 modify jobs in the JobTracker.<br />(Hemanth Yamijala via omalley)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-1869">HADOOP-1869</a>. Support access times for HDFS files.<br />(dhruba)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3941">HADOOP-3941</a>. Extend FileSystem API to return file-checksums.<br />(szetszwo)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3581">HADOOP-3581</a>. Prevents memory intensive user tasks from taking down
+nodes.<br />(Vinod K V via ddas)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3970">HADOOP-3970</a>. Provides a way to recover counters written to JobHistory.<br />(Amar Kamat via ddas)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3702">HADOOP-3702</a>. Adds ChainMapper and ChainReducer classes allow composing
+chains of Maps and Reduces in a single Map/Reduce job, something like
+MAP+ / REDUCE MAP*.<br />(Alejandro Abdelnur via ddas)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3445">HADOOP-3445</a>. Add capacity scheduler that provides guaranteed capacities to
+queues as a percentage of the cluster.<br />(Vivek Ratan via omalley)</li>
     </ol>
   </li>
   <li><a href="javascript:toggleList('trunk_(unreleased_changes)_._improvements_')">  IMPROVEMENTS
-</a>&nbsp;&nbsp;&nbsp;(48)
+</a>&nbsp;&nbsp;&nbsp;(49)
     <ol id="trunk_(unreleased_changes)_._improvements_">
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3908">HADOOP-3908</a>. Fuse-dfs: better error message if llibhdfs.so doesn't exist.<br />(Pete Wyckoff through zshao)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3732">HADOOP-3732</a>. Delay intialization of datanode block verification till
@@ -219,7 +229,6 @@ error messages.<br />(Steve Loughran via omalley)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3844">HADOOP-3844</a>. Include message of local exception in RPC client failures.<br />(Steve Loughran via omalley)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3935">HADOOP-3935</a>. Split out inner classes from DataNode.java.<br />(johan)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3905">HADOOP-3905</a>. Create generic interfaces for edit log streams.<br />(shv)</li>
-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3934">HADOOP-3934</a>. Upgrade log4j to 1.2.15.<br />(omalley)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3062">HADOOP-3062</a>. Add metrics to DataNode and TaskTracker to record network
 traffic for HDFS reads/writes and MR shuffling.<br />(cdouglas)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3742">HADOOP-3742</a>. Remove HDFS from public java doc and add javadoc-dev for
@@ -238,10 +247,13 @@ characters.<br />(Matei Zaharia via omalley)</li>
 NetworkTopology.pseudoSortByDistance.<br />(hairong via omalley)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3498">HADOOP-3498</a>. File globbing alternation should be able to span path
 components.<br />(tomwhite)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3361">HADOOP-3361</a>. Implement renames for NativeS3FileSystem.<br />(Albert Chern via tomwhite)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3605">HADOOP-3605</a>. Make EC2 scripts show an error message if AWS_ACCOUNT_ID is
+unset.<br />(Al Hoang via tomwhite)</li>
     </ol>
   </li>
   <li><a href="javascript:toggleList('trunk_(unreleased_changes)_._optimizations_')">  OPTIMIZATIONS
-</a>&nbsp;&nbsp;&nbsp;(7)
+</a>&nbsp;&nbsp;&nbsp;(8)
     <ol id="trunk_(unreleased_changes)_._optimizations_">
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3556">HADOOP-3556</a>. Removed lock contention in MD5Hash by changing the
 singleton MessageDigester by an instance per Thread using
@@ -258,10 +270,12 @@ initialized.<br />(acmurthy via omalley)</li>
 versions.<br />(acmurthy via omalley)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3769">HADOOP-3769</a>. Make the SampleMapper and SampleReducer from
 GenericMRLoadGenerator public, so they can be used in other contexts.<br />(Lingyun Yang via omalley)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3514">HADOOP-3514</a>. Inline the CRCs in intermediate files as opposed to reading
+it from a different .crc file.<br />(Jothi Padmanabhan via ddas)</li>
     </ol>
   </li>
   <li><a href="javascript:toggleList('trunk_(unreleased_changes)_._bug_fixes_')">  BUG FIXES
-</a>&nbsp;&nbsp;&nbsp;(55)
+</a>&nbsp;&nbsp;&nbsp;(60)
     <ol id="trunk_(unreleased_changes)_._bug_fixes_">
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3563">HADOOP-3563</a>.  Refactor the distributed upgrade code so that it is
 easier to identify datanode and namenode related code.<br />(dhruba)</li>
@@ -320,8 +334,6 @@ dfs. (Tsz Wo (Nicholas) Sze via omalley)
 </li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3773">HADOOP-3773</a>. Change Pipes to set the default map output key and value
 types correctly.<br />(Koji Noguchi via omalley)</li>
-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3940">HADOOP-3940</a>. Fix in-memory merge condition to wait when there are no map
-outputs or when the final map outputs are being fetched without contention.<br />(cdouglas)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3952">HADOOP-3952</a>. Fix compilation error in TestDataJoin referencing dfs package.<br />(omalley)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3951">HADOOP-3951</a>. Fix package name for FSNamesystem logs and modify other
 hard-coded Logs to use the class name.<br />(cdouglas)</li>
@@ -369,18 +381,35 @@ ClusterTestDFS. (Tsz Wo (Nicholas), SZE via cdouglas)
 </li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3954">HADOOP-3954</a>. Disable record skipping by default.<br />(Sharad Agarwal via
 cdouglas)</li>
-      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4050">HADOOP-4050</a>. Fix TestFailScheduler to use absolute paths for the work
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4050">HADOOP-4050</a>. Fix TestFairScheduler to use absolute paths for the work
 directory.<br />(Matei Zaharia via omalley)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4069">HADOOP-4069</a>. Keep temporary test files from TestKosmosFileSystem under
 test.build.data instead of /tmp.<br />(lohit via omalley)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4078">HADOOP-4078</a>. Create test files for TestKosmosFileSystem in separate
+directory under test.build.data.<br />(lohit)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3968">HADOOP-3968</a>. Fix getFileBlockLocations calls to use FileStatus instead
+of Path reflecting the new API.<br />(Pete Wyckoff via lohit)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3963">HADOOP-3963</a>. libhdfs does not exit on its own, instead it returns error
+to the caller and behaves as a true library.<br />(Pete Wyckoff via dhruba)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4100">HADOOP-4100</a>. Removes the cleanupTask scheduling from the Scheduler
+implementations and moves it to the JobTracker.<br />(Amareshwari Sriramadasu via ddas)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4097">HADOOP-4097</a>. Make hive work well with speculative execution turned on.<br />(Joydeep Sen Sarma via dhruba)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-4113">HADOOP-4113</a>. Changes to libhdfs to not exit on its own, rather return
+an error code to the caller.<br />(Pete Wyckoff via dhruba)</li>
     </ol>
   </li>
 </ul>
 <h2><a href="javascript:toggleList('release_0.18.1_-_unreleased_')">Release 0.18.1 - Unreleased
 </a></h2>
 <ul id="release_0.18.1_-_unreleased_">
+  <li><a href="javascript:toggleList('release_0.18.1_-_unreleased_._improvements_')">  IMPROVEMENTS
+</a>&nbsp;&nbsp;&nbsp;(1)
+    <ol id="release_0.18.1_-_unreleased_._improvements_">
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3934">HADOOP-3934</a>. Upgrade log4j to 1.2.15.<br />(omalley)</li>
+    </ol>
+  </li>
   <li><a href="javascript:toggleList('release_0.18.1_-_unreleased_._bug_fixes_')">  BUG FIXES
-</a>&nbsp;&nbsp;&nbsp;(4)
+</a>&nbsp;&nbsp;&nbsp;(5)
     <ol id="release_0.18.1_-_unreleased_._bug_fixes_">
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-3995">HADOOP-3995</a>. In case of quota failure on HDFS, rename does not restore
 source filename.<br />(rangadi)</li>
@@ -391,6 +420,8 @@ from the TaskTracker, which was causing HDFS client connections to not be
 collected.<br />(ddas via omalley)</li>
       <li><a href="http://issues.apache.org/jira/browse/HADOOP-4046">HADOOP-4046</a>. Made WritableComparable's constructor protected instead of
 private to re-enable class derivation.<br />(cdouglas via omalley)</li>
+      <li><a href="http://issues.apache.org/jira/browse/HADOOP-3940">HADOOP-3940</a>. Fix in-memory merge condition to wait when there are no map
+outputs or when the final map outputs are being fetched without contention.<br />(cdouglas)</li>
     </ol>
   </li>
 </ul>

+ 3 - 0
docs/cluster_setup.html

@@ -132,6 +132,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/commands_manual.html

@@ -132,6 +132,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/distcp.html

@@ -131,6 +131,9 @@ document.write("Last Published: " + document.lastModified);
 <div class="menuitem">
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
+<div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
 <div class="menupage">
 <div class="menupagetitle">DistCp Guide</div>
 </div>

+ 15 - 0
docs/hadoop-default.html

@@ -352,6 +352,12 @@ creations/deletions), or "all".</td>
 <td><a name="dfs.replication.interval">dfs.replication.interval</a></td><td>3</td><td>The periodicity in seconds with which the namenode computes repliaction work for datanodes. </td>
 </tr>
 <tr>
+<td><a name="dfs.access.time.precision">dfs.access.time.precision</a></td><td>3600000</td><td>The access time for HDFS file is precise upto this value. 
+               The default value is 1 hour. Setting a value of 0 disables
+               access times for HDFS.
+  </td>
+</tr>
+<tr>
 <td><a name="fs.s3.block.size">fs.s3.block.size</a></td><td>67108864</td><td>Block size to use when writing files to S3.</td>
 </tr>
 <tr>
@@ -438,6 +444,15 @@ creations/deletions), or "all".</td>
   </td>
 </tr>
 <tr>
+<td><a name="mapred.tasktracker.taskmemorymanager.monitoring-interval">mapred.tasktracker.taskmemorymanager.monitoring-interval</a></td><td>5000</td><td>The interval, in milliseconds, for which the tasktracker waits
+   between two cycles of monitoring its tasks' memory usage.</td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill">mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</a></td><td>5000</td><td>The time, in milliseconds, the tasktracker waits for sending a
+  SIGKILL to a process that has overrun memory limits, after it has been sent
+  a SIGTERM.</td>
+</tr>
+<tr>
 <td><a name="mapred.map.tasks">mapred.map.tasks</a></td><td>2</td><td>The default number of map tasks per job.  Typically set
   to a prime several times greater than number of available hosts.
   Ignored when mapred.job.tracker is "local".  

+ 3 - 0
docs/hadoop_archives.html

@@ -132,6 +132,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/hdfs_design.html

@@ -134,6 +134,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/hdfs_permissions_guide.html

@@ -134,6 +134,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/hdfs_quota_admin_guide.html

@@ -134,6 +134,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/hdfs_shell.html

@@ -132,6 +132,9 @@ document.write("Last Published: " + document.lastModified);
 <div class="menupagetitle">FS Shell Guide</div>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/hdfs_user_guide.html

@@ -134,6 +134,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/hod.html

@@ -134,6 +134,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/hod_admin_guide.html

@@ -134,6 +134,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/hod_config_guide.html

@@ -134,6 +134,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/hod_user_guide.html

@@ -134,6 +134,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/index.html

@@ -132,6 +132,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 9 - 0
docs/linkmap.html

@@ -132,6 +132,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">
@@ -258,6 +261,12 @@ document.write("Last Published: " + document.lastModified);
 </li>
 </ul>
     
+<ul>
+<li>
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>fs</em>
+</li>
+</ul>
+    
 <ul>
 <li>
 <a href="distcp.html">DistCp Guide</a>&nbsp;&nbsp;___________________&nbsp;&nbsp;<em>distcp</em>

+ 16 - 16
docs/linkmap.pdf

@@ -5,10 +5,10 @@
 /Producer (FOP 0.20.5) >>
 endobj
 5 0 obj
-<< /Length 1173 /Filter [ /ASCII85Decode /FlateDecode ]
+<< /Length 1192 /Filter [ /ASCII85Decode /FlateDecode ]
  >>
 stream
-Gatn'968Q9&BF88'L%gML%Vg$$n:9m'IX:*!>n;Y9!,'7W@b$c[n!l!//<go:>;1;P/mJ"Bsk",ojY7PIYe.p5T"=GMm35J0#HOY!J=5K_BA*`(^U4TENgmEY@hXM`3\AA7Bq;^p.eYt-/X[a)ud1JT%D`%]rES/.b-]j^'C.:>+#R+).c<u7M0#O0liag[_3"$5<6()Hu(tXR#[:cD3^$aPLeiu&BYo*V;:03Ago0%W`;c[Wicrl^^`No1]^2G_#=cdqObmHY]gId+Wu8HqjM/?h9Ue0KCFr:Igu\9?Ns@16'<ABUm[u"!jNeu5_)0+q]%pYq27<$9"f+Iah!q!L']bO[SU#&rsK%Pq>lKG?OtkG]kI+;gs;jl(gtEiNV[%)MC_.<p5iR(1;.N>Dq'.mC-6+/gf\5:kX^ZQ/:'^n._8Z;Adn1::u1KB_`I.1ml?9-YrDp/a;Cn;+CAJ\Y"-??PNFohLFer)RO!EGfN$2&m;rmP6G<3REdhnC^V8uDFoj8sN&"K438?]sO2CiDRSpSrcKt7]W!V$.KsAg?*-K?8q;bF\GI?EGn1lM*-1rn8<W,nR-P6Ft`@f\<o>#A,cb-h?Fe,eiYB\Uf@Q('3$"N%2V?`54;AOW.ch@)o(Idjn;&pq_'B3aKj&M9;99`c[eaCNZO`i0p"Z*9B:@Dh?#l]<@?LckI-]rPNFl_1s3ah%ubMVc%`7<D9TgiSAX>':DgO!5mpD2rN>k66k-\"%eOeSBUN[V\HgjqEt*$rt$?Ye0tXuD!&;p'!M:l\(.>+K-O_;d+\O.81o[/G@o@X[7S&@n8-@!%9*T7+?um1JT<C2@75dN"Ka"tVXU[X)7%Z07R"1McgVNQ!gr.d&EJ)U;TJ(uNGL3_6oooL7WZfg'sl]@QEUEt^YiU#$Ko>[F%Wiu$\E]4i0h8ps9N`FWK+ortj2CAn9(iD<c!`PP`IJu"Mf+$PNJnCH;(hq..H]T(#%e_qo7U8X$u.8Cl['>^j"X.36M-VuS3b)EiVkoZD4CG.W)]A^2+q"08K"<nbG5s5@,#)rnsMsJ??rg!TLkMY<b+\!';eg1Fb5BReP&e&GaKN@d6=!MX(2-qAP6PPPDUsGFZX[@!^3@oA:[JT99X6t7CNEk%?PmuWW^j=6bcZBBYf'3WkY2>-GN)T`cL]n7U~>
+Gatn'9lJcG&;KZQ'fr4IE;?JoURQ)R>r6CsHa/@n@_kZY9LKL6M/psm^Qb*hU^L4Xli]jM$C^_JE/#A&m<\<W*JlHR0NrB1YlC%C@.^ro&N%6Nb5/B169sNe>$'*9="C8hS*1b<WHiOW6.SgHhi._RriL:6X3K;2;7.1IDVmk^7dE9u15c^Pb+9SG&eIEFjjmYhp*NoAjN;?2Z2%""4''e<oi@$B>ZJC6BP4-+DNED'USJp0<]=<e]1.;Q3sr3u$tSTQqcAa00.)TbmhQ[+5_%#O4YN&bkJH."r85gb#D;PB#'#5[Q=8[HSRWK<a#sPo!(.:o)Bh3<>EsDt=Z<L[Wop)g8h?D]<2f^r_1:NoJ78%.JG*\1R_9CgZ`+cB"NA*03Z+D3K,CBbZA23;:T`c=T$PEXa'=mBa6WpL:!&bk7s$\$f4r]l!PT2XRiQ[jKO+%0:U.;CV]]J17d:THqqJa58@WL,Xbl*^$YP*()2N"lX)<j?F:_?mK)+AHrmeT/IFlU#$sX-K(Am*hiV#ik'(0D[<3g@8B"&SNVSp+M;kN#*C)@3IaesUOPi`JFZD_:cbq@8K\s2r'&d*_aTB`4tO>I8VSuO8]c8b1b]^C#)S'nr;#kg.IK?+a@S-IHgp%N2Ob48G-SRd7Il>*$O<LO(iE:pn&-bYiAaS8&@k;7Z9C2jq-(JIeCS=c3;oajoedZ>r0;qH]oc0f%)a!ODf?a)3kq=GFs&.E144l9Op&Oi$J<ReT9DO0HRFMCCG?e6!:N@6,VEJB(5pX.JDH\_2hCAeDTq5rV8'^]^UbQ'BW*lP&9Aq<:*\C-ApAq,.iVip9<#Z%6:X\Q-h+t7d1*BA+!V>'S085'Ui&GA!W_<;R^U#BMMM"FV/[VnTGC]Be'b:L!#^/ZO33hjI7>(6Igi>_(C\VlN(1=?ir4#olf1)A*1^ZCZL%Fji4^@di/g_>f0O+V)qo&37^fD((b:XcNnQk]:"Q8W"Z>>)$[044j:o?pPgXi2_[Op:O:M^SaISc/&QdV%WX6AH(#:U(LCD2f%orOuT#%sJE!LGFGf86A7bE<n7V)A-g%%u[pqSHLjs_<[X[MA+.&.ga^T(r\&K#[iKh\l$*.NI9OHnZq+(An^O>;is+b'O0aT@_(O0gL$q>Ic68BEi6L[(qtHMo9q7eChM+4S\?es`Ej6dSGN7`',dn~>
 endstream
 endobj
 6 0 obj
@@ -20,10 +20,10 @@ endobj
 >>
 endobj
 7 0 obj
-<< /Length 316 /Filter [ /ASCII85Decode /FlateDecode ]
+<< /Length 388 /Filter [ /ASCII85Decode /FlateDecode ]
  >>
 stream
-Gaqcq4\rsL&;GE/MAt"F@peTV*be%J_uQer?uW*/7\k#p%#BCo'3Z]EVrr0rIX#AEm1)E\!Oa*Z9#?.Ua$ChGNB&DY;iV(K).'Us(&\JW:ap//)BjI/RZFdj(Gk@eol;Ps$f$I`qBkndGQ[h.mOe5SXUV&IR45-]#WVm[.&$6/OCtj:U@N*LCJ:/UjZ3VkW)f7L:Tdt!j2G`2ap6arCMKc#S2KG,;R;[^74QO]V7*)rmAf^k7m3D<ZWEB=,%OACA5@Z+]@O[AGoqq=j1<oS(VZ?5McFHSm&[?;(qoeaqqcPSr^`s%qZFM"B>a~>
+Gar?-5uWCi&;BTNMESBQA$TX/E,7W0i-<Tt2sYQp)^o2(Q(j<=h>jY\L"#d(p[-_+VpXmp5To*KAeBe\'#=Gr5c@^_UmU-SdN^kMeqc/:)^1%7:QsJji!Fa4ltr0&ZPSFA_WLkSKbm;K]qbFH]Xl(>Qr7tk\%VLJPT^kj_^^Ma%*"r/bQK8@eZ*:f4."MLd0a=Q:m>lgHQ$UtI6EPhWj33hWj6VUF#C^*U\AXh8K1EDbqt"<U2u%l[=^#VMWX:i`SWDlNu+9&'$6*H9_iAnYs/FI"i&j1VpggGWt;O(&K'a7Mn[`93doUK^2LT3VHs0>8IU9BRt9#$)dmipgQ5K:epQ.3:mTX%\[,"-Pq4?L:Z"P&I1Fo#?2a#2CtRPS]SMSi~>
 endstream
 endobj
 8 0 obj
@@ -87,19 +87,19 @@ endobj
 xref
 0 14
 0000000000 65535 f 
-0000002512 00000 n 
-0000002576 00000 n 
-0000002626 00000 n 
+0000002603 00000 n 
+0000002667 00000 n 
+0000002717 00000 n 
 0000000015 00000 n 
 0000000071 00000 n 
-0000001336 00000 n 
-0000001442 00000 n 
-0000001849 00000 n 
-0000001955 00000 n 
-0000002067 00000 n 
-0000002177 00000 n 
-0000002288 00000 n 
-0000002396 00000 n 
+0000001355 00000 n 
+0000001461 00000 n 
+0000001940 00000 n 
+0000002046 00000 n 
+0000002158 00000 n 
+0000002268 00000 n 
+0000002379 00000 n 
+0000002487 00000 n 
 trailer
 <<
 /Size 14
@@ -107,5 +107,5 @@ trailer
 /Info 4 0 R
 >>
 startxref
-2748
+2839
 %%EOF

+ 3 - 0
docs/mapred_tutorial.html

@@ -132,6 +132,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menupage">

+ 3 - 0
docs/native_libraries.html

@@ -132,6 +132,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/quickstart.html

@@ -132,6 +132,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 3 - 0
docs/streaming.html

@@ -135,6 +135,9 @@ document.write("Last Published: " + document.lastModified);
 <a href="hdfs_shell.html">FS Shell Guide</a>
 </div>
 <div class="menuitem">
+<a href="SLG_user_guide.html">Synthetic Load Generator User Guide</a>
+</div>
+<div class="menuitem">
 <a href="distcp.html">DistCp Guide</a>
 </div>
 <div class="menuitem">

+ 192 - 0
src/docs/src/documentation/content/xdocs/SLG_user_guide.xml

@@ -0,0 +1,192 @@
+<?xml version="1.0"?>
+<!--
+  Copyright 2002-2004 The Apache Software Foundation
+  
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+  
+      http://www.apache.org/licenses/LICENSE-2.0
+      
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+<document>
+	<header>
+		<title> Synthetic Load Generator User Guide </title>
+	</header>
+	<body>
+		<section>
+			<title> Description </title>
+			<p>
+        The synthetic load generator is a tool for testing NameNode behavior
+        under different client loads. The user can generate different mixes 
+        of read, write, and list requests by specifying the probabilities of
+        read and write. The user controls the intensity of the load by adjusting
+        parameters for the number of worker threads and the delay between 
+        operations. While load generators are running, the user can profile and
+        monitor the running of the NameNode. When a load generator exits, it
+        prints some NameNode statistics like the average execution time of each
+        kind of operation and the NameNode throughput.
+                       </p>
+                </section>
+		<section>
+			<title> Synopsis </title>
+			<p>
+        <code>java LoadGenerator [options]</code><br/>
+                        </p>
+                        <p>
+        Options include:<br/>
+        <code>&nbsp;&nbsp;-readProbability &lt;read probability&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;the probability of the read operation;
+                default is 0.3333. </code><br/>
+        <code>&nbsp;&nbsp;-writeProbability &lt;write probability&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;the probability of the write 
+                operations; default is 0.3333.</code><br/>
+        <code>&nbsp;&nbsp;-root &lt;test space root&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;the root of the test space;
+                default is /testLoadSpace.</code><br/>
+        <code>&nbsp;&nbsp;-maxDelayBetweenOps 
+                &lt;maxDelayBetweenOpsInMillis&gt;</code><br/> 
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;the maximum delay between two consecutive
+                operations in a thread; default is 0 indicating no delay.
+                </code><br/>
+        <code>&nbsp;&nbsp;-numOfThreads &lt;numOfThreads&gt;</code><br/> 
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;the number of threads to spawn; 
+                default is 200.</code><br/>
+        <code>&nbsp;&nbsp;-elapsedTime &lt;elapsedTimeInSecs&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;the number of seconds that the program 
+                will run; A value of zero indicates that the program runs
+                forever. The default value is 0.</code><br/>
+        <code>&nbsp;&nbsp;-startTime &lt;startTimeInMillis&gt;</code><br/> 
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;the time that all worker threads 
+                start to run. By default it is 10 seconds after the main 
+                program starts running.This creates a barrier if more than
+                one load generator is running.
+        </code><br/>
+        <code>&nbsp;&nbsp;-seed &lt;seed&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;the random generator seed for repeating 
+                requests to NameNode when running with a single thread;
+                default is the current time.</code><br/>
+			</p>
+			<p>
+        After command line argument parsing, the load generator traverses 
+        the test space and builds a table of all directories and another table
+        of all files in the test space. It then waits until the start time to
+        spawn the number of worker threads as specified by the user. Each
+        thread sends a stream of requests to NameNode. At each iteration, 
+        it first decides if it is going to read a file, create a file, or
+        list a directory following the read and write probabilities specified
+        by the user. The listing probability is equal to 
+        <em>1-read probability-write probability</em>. When reading, 
+        it randomly picks a file in the test space and reads the entire file. 
+        When writing, it randomly picks a directory in the test space and 
+        creates a file there. To avoid two threads with the same load 
+        generator or from two different load generators create the same 
+        file, the file name consists of the current machine's host name 
+        and the thread id. The length of the file follows Gaussian 
+        distribution with an average size of 2 blocks and the standard 
+        deviation of 1. The new file is filled with byte 'a'. To avoid
+        the test space to grow indefinitely, the file is deleted immediately
+        after the file creation completes. While listing, it randomly 
+        picks a directory in the test space and lists its content. 
+        After an operation completes, the thread pauses for a random 
+        amount of time in the range of [0, maxDelayBetweenOps] if the 
+        specified maximum delay is not zero. All threads are stopped when 
+        the specified elapsed time is passed. Before exiting, the program 
+        prints the average execution for each kind of NameNode operations, 
+        and the number of requests served by the NameNode per second.
+                        </p>
+                </section>
+                <section>
+                        <title> Test Space Population </title>
+                        <p>
+        The user needs to populate a test space before she runs a 
+        load generator. The structure generator generates a random 
+        test space structure and the data generator creates the files 
+        and directories of the test space in Hadoop distributed file system.
+                        </p>
+                        <section>
+                                <title> Structure Generator </title>
+                                <p>
+        This tool generates a random namespace structure with the 
+        following constraints:
+                                </p>
+                                        <ol>
+        <li>The number of subdirectories that a directory can have is 
+            a random number in [minWidth, maxWidth].</li>
+        <li>The maximum depth of each subdirectory is a random number 
+            [2*maxDepth/3, maxDepth].</li>
+        <li>Files are randomly placed in leaf directories. The size of 
+            each file follows Gaussian distribution with an average size 
+            of 1 block and a standard deviation of 1.</li>
+                                        </ol>
+                                <p>
+        The generated namespace structure is described by two files in 
+        the output directory. Each line of the first file contains the 
+        full name of a leaf directory. Each line of the second file 
+        contains the full name of a file and its size, separated by a blank.
+                                </p>
+                                <p>
+        The synopsis of the command is
+                                </p>
+                                <p>
+        <code>java StructureGenerator [options]</code>
+                                </p>
+                                <p>
+        Options include:<br/>
+        <code>&nbsp;&nbsp;-maxDepth &lt;maxDepth&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;maximum depth of the directory tree; 
+                default is 5.</code><br/>
+        <code>&nbsp;&nbsp;-minWidth &lt;minWidth&gt;</code><br/> 
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;minimum number of subdirectories per 
+                directories; default is 1.</code><br/>
+        <code>&nbsp;&nbsp;-maxWidth &lt;maxWidth&gt;</code><br/> 
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;maximum number of subdirectories per 
+                directories; default is 5.</code><br/>
+        <code>&nbsp;&nbsp;-numOfFiles &lt;#OfFiles&gt;</code><br/> 
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;the total number of files in the test 
+                space; default is 10.</code><br/>
+        <code>&nbsp;&nbsp;-avgFileSize &lt;avgFileSizeInBlocks&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;average size of blocks; default is 1.
+                </code><br/>
+        <code>&nbsp;&nbsp;-outDir &lt;outDir&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;output directory; default is the 
+                current directory. </code><br/>
+        <code>&nbsp;&nbsp;-seed &lt;seed&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;random number generator seed; 
+                default is the current time.</code><br/>
+                                </p>
+                        </section>
+                        <section>
+                                <title> Test Space Generator </title>
+                                <p>
+        This tool reads the directory structure and file structure from 
+        the input directory and creates the namespace in Hadoop distributed
+        file system. All files are filled with byte 'a'.
+                                </p>
+                                <p>
+        The synopsis of the command is
+                                </p>
+                                <p>
+        <code>java DataGenerator [options]</code>
+                                </p>
+                                <p>
+        Options include:<br/>
+        <code>&nbsp;&nbsp;-inDir &lt;inDir&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;input directory name where directory/file
+                structures are stored; default is the current directory.
+        </code><br/>
+        <code>&nbsp;&nbsp;-root &lt;test space root&gt;</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;the name of the root directory which the 
+                new namespace is going to be placed under; 
+                default is "/testLoadSpace".</code><br/>
+                                </p>
+		        </section>
+                </section>
+	</body>
+</document>

+ 1 - 0
src/docs/src/documentation/content/xdocs/site.xml

@@ -41,6 +41,7 @@ See http://forrest.apache.org/docs/linking.html for more info.
     <hdfs      label="HDFS Quotas Administrator Guide" href="hdfs_quota_admin_guide.html" />
     <commands  label="Commands Manual"     href="commands_manual.html" />
     <fs        label="FS Shell Guide"     href="hdfs_shell.html" />
+    <fs        label="Synthetic Load Generator User Guide"  href="SLG_user_guide.html" />
     <distcp    label="DistCp Guide"       href="distcp.html" />
     <mapred    label="Map-Reduce Tutorial" href="mapred_tutorial.html" />
     <mapred    label="Native Hadoop Libraries" href="native_libraries.html" />

+ 160 - 0
src/test/org/apache/hadoop/fs/loadGenerator/DataGenerator.java

@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.loadGenerator;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This program reads the directory structure and file structure from
+ * the input directory and creates the namespace in the file system
+ * specified by the configuration in the specified root.
+ * All the files are filled with 'a'.
+ * 
+ * The synopsis of the command is
+ * java DataGenerator 
+ *   -inDir <inDir>: input directory name where directory/file structures
+ *                   are stored. Its default value is the current directory.
+ *   -root <root>: the name of the root directory which the new namespace 
+ *                 is going to be placed under. 
+ *                 Its default value is "/testLoadSpace".
+ */
+public class DataGenerator extends Configured implements Tool {
+  private File inDir = StructureGenerator.DEFAULT_STRUCTURE_DIRECTORY;
+  private Path root = DEFAULT_ROOT;
+  private FileSystem fs;
+  final static private long BLOCK_SIZE = 10;
+  final static private String USAGE = "java DataGenerator " +
+  		"-inDir <inDir> " +
+  		"-root <root>";
+  
+  /** default name of the root where the test namespace will be placed under */
+  final static Path DEFAULT_ROOT = new Path("/testLoadSpace");
+  
+  /** Main function.
+   * It first parses the command line arguments.
+   * It then reads the directory structure from the input directory 
+   * structure file and creates directory structure in the file system
+   * namespace. Afterwards it reads the file attributes and creates files 
+   * in the file. All file content is filled with 'a'.
+   */
+  public int run(String[] args) throws Exception {
+    int exitCode = 0;
+    exitCode = init(args);
+    if (exitCode != 0) {
+      return exitCode;
+    }
+    genDirStructure();
+    genFiles();
+    return exitCode;
+  }
+
+  /** Parse the command line arguments and initialize the data */
+  private int init(String[] args) {
+    try { // initialize file system handle
+      fs = FileSystem.get(getConf());
+    } catch (IOException ioe) {
+      System.err.println("Can not initialize the file system: " + 
+          ioe.getLocalizedMessage());
+      return -1;
+    }
+
+    for (int i = 0; i < args.length; i++) { // parse command line
+      if (args[i].equals("-root")) {
+        root = new Path(args[++i]);
+      } else if (args[i].equals("-inDir")) {
+        inDir = new File(args[++i]);
+      } else {
+        System.err.println(USAGE);
+        ToolRunner.printGenericCommandUsage(System.err);
+        System.exit(-1);
+      }
+    }
+    return 0;
+  }
+  
+  /** Read directory structure file under the input directory.
+   * Create each directory under the specified root.
+   * The directory names are relative to the specified root.
+   */
+  private void genDirStructure() throws IOException {
+    BufferedReader in = new BufferedReader(
+        new FileReader(new File(inDir, 
+            StructureGenerator.DIR_STRUCTURE_FILE_NAME)));
+    String line;
+    while ((line=in.readLine()) != null) {
+      fs.mkdirs(new Path(root+line));
+    }
+  }
+
+  /** Read file structure file under the input directory.
+   * Create each file under the specified root.
+   * The file names are relative to the root.
+   */
+  private void genFiles() throws IOException {
+    BufferedReader in = new BufferedReader(
+        new FileReader(new File(inDir, 
+            StructureGenerator.FILE_STRUCTURE_FILE_NAME)));
+    String line;
+    while ((line=in.readLine()) != null) {
+      String[] tokens = line.split(" ");
+      if (tokens.length != 2) {
+        throw new IOException("Expect at most 2 tokens per line: " + line);
+      }
+      String fileName = root+tokens[0];
+      long fileSize = (long)(BLOCK_SIZE*Double.parseDouble(tokens[1]));
+      genFile(new Path(fileName), fileSize);
+    }
+  }
+  
+  /** Create a file with the name <code>file</code> and 
+   * a length of <code>fileSize</code>. The file is filled with character 'a'.
+   */
+  private void genFile(Path file, long fileSize) throws IOException {
+    FSDataOutputStream out = fs.create(file, true, 
+        getConf().getInt("io.file.buffer.size", 4096),
+        (short)getConf().getInt("dfs.replication", 3),
+        fs.getDefaultBlockSize());
+    for(long i=0; i<fileSize; i++) {
+      out.writeByte('a');
+    }
+    out.close();
+  }
+  
+  /** Main program.
+   * 
+   * @param args Command line arguments
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(),
+        new DataGenerator(), args);
+    System.exit(res);
+  }
+}

+ 466 - 0
src/test/org/apache/hadoop/fs/loadGenerator/LoadGenerator.java

@@ -0,0 +1,466 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.loadGenerator;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/** The load generator is a tool for testing NameNode behavior under
+ * different client loads.
+ * It allows the user to generate different mixes of read, write,
+ * and list requests by specifying the probabilities of read and
+ * write. The user controls the the intensity of the load by
+ * adjusting parameters for the number of worker threads and the delay
+ * between operations. While load generators are running, the user
+ * can profile and monitor the running of the NameNode. When a load
+ * generator exits, it print some NameNode statistics like the average
+ * execution time of each kind of operations and the NameNode
+ * throughput.
+ * 
+ * After command line argument parsing and data initialization,
+ * the load generator spawns the number of worker threads 
+ * as specified by the user.
+ * Each thread sends a stream of requests to the NameNode.
+ * For each iteration, it first decides if it is going to read a file,
+ * create a file, or listing a directory following the read and write 
+ * probabilities specified by the user.
+ * When reading, it randomly picks a file in the test space and reads
+ * the entire file. When writing, it randomly picks a directory in the
+ * test space and creates a file whose name consists of the current 
+ * machine's host name and the thread id. The length of the file
+ * follows Gaussian distribution with an average size of 2 blocks and
+ * the standard deviation of 1 block. The new file is filled with 'a'.
+ * Immediately after the file creation completes, the file is deleted
+ * from the test space.
+ * While listing, it randomly picks a directory in the test space and
+ * list the directory content.
+ * Between two consecutive operations, the thread pauses for a random
+ * amount of time in the range of [0, maxDelayBetweenOps] 
+ * if the specified max delay is not zero.
+ * All threads are stopped when the specified elapsed time is passed.
+ * Before exiting, the program prints the average execution for 
+ * each kind of NameNode operations, and the number of requests
+ * served by the NameNode.
+ *
+ * The synopsis of the command is
+ * java LoadGenerator
+ *   -readProbability <read probability>: read probability [0, 1]
+ *                                        with a default value of 0.3333. 
+ *   -writeProbability <write probability>: write probability [0, 1]
+ *                                         with a default value of 0.3333.
+ *   -root <root>: test space with a default value of /testLoadSpace
+ *   -maxDelayBetweenOps <maxDelayBetweenOpsInMillis>: 
+ *      Max delay in the unit of milliseconds between two operations with a 
+ *      default value of 0 indicating no delay.
+ *   -numOfThreads <numOfThreads>: 
+ *      number of threads to spawn with a default value of 200.
+ *   -elapsedTime <elapsedTimeInSecs>: 
+ *      the elapsed time of program with a default value of 0 
+ *      indicating running forever
+ *   -startTime <startTimeInMillis> : when the threads start to run.
+ */
+public class LoadGenerator extends Configured implements Tool {
+  private volatile boolean shouldRun = true;
+  private Path root = DataGenerator.DEFAULT_ROOT;
+  private FileSystem fs;
+  private int maxDelayBetweenOps = 0;
+  private int numOfThreads = 200;
+  private double readPr = 0.3333;
+  private double writePr = 0.3333;
+  private long elapsedTime = 0;
+  private long startTime = System.currentTimeMillis()+10000;
+  final static private int BLOCK_SIZE = 10;
+  private ArrayList<String> files = new ArrayList<String>();  // a table of file names
+  private ArrayList<String> dirs = new ArrayList<String>(); // a table of directory names
+  private Random r = null;
+  final private static String USAGE = "java LoadGenerator\n" +
+  	"-readProbability <read probability>\n" +
+    "-writeProbability <write probability>\n" +
+    "-root <root>\n" +
+    "-maxDelayBetweenOps <maxDelayBetweenOpsInMillis>\n" +
+    "-numOfThreads <numOfThreads>\n" +
+    "-elapsedTime <elapsedTimeInSecs>\n" +
+    "-startTime <startTimeInMillis>";
+  final private String hostname;
+  
+  /** Constructor */
+  public LoadGenerator() throws IOException, UnknownHostException {
+    InetAddress addr = InetAddress.getLocalHost();
+    hostname = addr.getHostName();
+  }
+
+  private final static int OPEN = 0;
+  private final static int LIST = 1;
+  private final static int CREATE = 2;
+  private final static int WRITE_CLOSE = 3;
+  private final static int DELETE = 4;
+  private final static int TOTAL_OP_TYPES =5;
+  private long [] executionTime = new long[TOTAL_OP_TYPES];
+  private long [] totalNumOfOps = new long[TOTAL_OP_TYPES];
+  
+  /** A thread sends a stream of requests to the NameNode.
+   * At each iteration, it first decides if it is going to read a file,
+   * create a file, or listing a directory following the read
+   * and write probabilities.
+   * When reading, it randomly picks a file in the test space and reads
+   * the entire file. When writing, it randomly picks a directory in the
+   * test space and creates a file whose name consists of the current 
+   * machine's host name and the thread id. The length of the file
+   * follows Gaussian distribution with an average size of 2 blocks and
+   * the standard deviation of 1 block. The new file is filled with 'a'.
+   * Immediately after the file creation completes, the file is deleted
+   * from the test space.
+   * While listing, it randomly picks a directory in the test space and
+   * list the directory content.
+   * Between two consecutive operations, the thread pauses for a random
+   * amount of time in the range of [0, maxDelayBetweenOps] 
+   * if the specified max delay is not zero.
+   * A thread runs for the specified elapsed time if the time isn't zero.
+   * Otherwise, it runs forever.
+   */
+  private class DFSClientThread extends Thread {
+    private int id;
+    private long [] executionTime = new long[TOTAL_OP_TYPES];
+    private long [] totalNumOfOps = new long[TOTAL_OP_TYPES];
+    private byte[] buffer = new byte[1024];
+    
+    private DFSClientThread(int id) {
+      this.id = id;
+    }
+    
+    /** Main loop
+     * Each iteration decides what's the next operation and then pauses.
+     */
+    public void run() {
+      try {
+        while (shouldRun) {
+          nextOp();
+          delay();
+        }
+      } catch (Exception ioe) {
+        System.err.println(ioe.getLocalizedMessage());
+        ioe.printStackTrace();
+      }
+    }
+    
+    /** Let the thread pause for a random amount of time in the range of
+     * [0, maxDelayBetweenOps] if the delay is not zero. Otherwise, no pause.
+     */
+    private void delay() throws InterruptedException {
+      if (maxDelayBetweenOps>0) {
+        int delay = r.nextInt(maxDelayBetweenOps);
+        Thread.sleep(delay);
+      }
+    }
+    
+    /** Perform the next operation. 
+     * 
+     * Depending on the read and write probabilities, the next
+     * operation could be either read, write, or list.
+     */
+    private void nextOp() throws IOException {
+      double rn = r.nextDouble();
+      if (rn < readPr) {
+        read();
+      } else if (rn < readPr+writePr) {
+        write();
+      } else {
+        list();
+      }
+    }
+    
+    /** Read operation randomly picks a file in the test space and reads
+     * the entire file */
+    private void read() throws IOException {
+      String fileName = files.get(r.nextInt(files.size()));
+      long startTime = System.currentTimeMillis();
+      InputStream in = fs.open(new Path(fileName));
+      executionTime[OPEN] += (System.currentTimeMillis()-startTime);
+      totalNumOfOps[OPEN]++;
+      while (in.read(buffer) != -1) {}
+      in.close();
+    }
+    
+    /** The write operation randomly picks a directory in the
+     * test space and creates a file whose name consists of the current 
+     * machine's host name and the thread id. The length of the file
+     * follows Gaussian distribution with an average size of 2 blocks and
+     * the standard deviation of 1 block. The new file is filled with 'a'.
+     * Immediately after the file creation completes, the file is deleted
+     * from the test space.
+     */
+    private void write() throws IOException {
+      String dirName = dirs.get(r.nextInt(dirs.size()));
+      Path file = new Path(dirName, hostname+id);
+      double fileSize = 0;
+      while ((fileSize = r.nextGaussian()+2)<=0) {}
+      genFile(file, (long)(fileSize*BLOCK_SIZE));
+      long startTime = System.currentTimeMillis();
+      fs.delete(file, true);
+      executionTime[DELETE] += (System.currentTimeMillis()-startTime);
+      totalNumOfOps[DELETE]++;
+    }
+    
+    /** The list operation randomly picks a directory in the test space and
+     * list the directory content.
+     */
+    private void list() throws IOException {
+      String dirName = dirs.get(r.nextInt(dirs.size()));
+      long startTime = System.currentTimeMillis();
+      fs.listStatus(new Path(dirName));
+      executionTime[LIST] += (System.currentTimeMillis()-startTime);
+      totalNumOfOps[LIST]++;
+    }
+  }
+  
+  /** Main function:
+   * It first initializes data by parsing the command line arguments.
+   * It then starts the number of DFSClient threads as specified by
+   * the user.
+   * It stops all the threads when the specified elapsed time is passed.
+   * Before exiting, it prints the average execution for 
+   * each operation and operation throughput.
+   */
+  public int run(String[] args) throws Exception {
+    int exitCode = init(args);
+    if (exitCode != 0) {
+      return exitCode;
+    }
+    
+    barrier();
+    
+    DFSClientThread[] threads = new DFSClientThread[numOfThreads];
+    for (int i=0; i<numOfThreads; i++) {
+      threads[i] = new DFSClientThread(i); 
+      threads[i].start();
+    }
+    if (elapsedTime>0) {
+      Thread.sleep(elapsedTime*1000);
+      shouldRun = false;
+    } 
+    for (DFSClientThread thread : threads) {
+      thread.join();
+      for (int i=0; i<TOTAL_OP_TYPES; i++) {
+        executionTime[i] += thread.executionTime[i];
+        totalNumOfOps[i] += thread.totalNumOfOps[i];
+      }
+    }
+    long totalOps = 0;
+    for (int i=0; i<TOTAL_OP_TYPES; i++) {
+      totalOps += totalNumOfOps[i];
+    }
+    
+    if (totalNumOfOps[OPEN] != 0) {
+      System.out.println("Average open execution time: " + 
+          (double)executionTime[OPEN]/totalNumOfOps[OPEN] + "ms");
+    }
+    if (totalNumOfOps[LIST] != 0) {
+      System.out.println("Average list execution time: " + 
+          (double)executionTime[LIST]/totalNumOfOps[LIST] + "ms");
+    }
+    if (totalNumOfOps[DELETE] != 0) {
+      System.out.println("Average deletion execution time: " + 
+          (double)executionTime[DELETE]/totalNumOfOps[DELETE] + "ms");
+      System.out.println("Average create execution time: " + 
+          (double)executionTime[CREATE]/totalNumOfOps[CREATE] + "ms");
+      System.out.println("Average write_close execution time: " + 
+          (double)executionTime[WRITE_CLOSE]/totalNumOfOps[WRITE_CLOSE] + "ms");
+    }
+    if (elapsedTime != 0) { 
+      System.out.println("Average operations per second: " + 
+          (double)totalOps/elapsedTime +"ops/s");
+    }
+    System.out.println();
+    return exitCode;
+  }
+
+  /** Parse the command line arguments and initialize the data */
+  private int init(String[] args) throws IOException {
+    try {
+      fs = FileSystem.get(getConf());
+    } catch (IOException ioe) {
+      System.err.println("Can not initialize the file system: " + 
+          ioe.getLocalizedMessage());
+      return -1;
+    }
+    int hostHashCode = hostname.hashCode();
+    try {
+      for (int i = 0; i < args.length; i++) { // parse command line
+        if (args[i].equals("-readProbability")) {
+          readPr = Double.parseDouble(args[++i]);
+          if (readPr<0 || readPr>1) {
+            System.err.println( 
+                "The read probability must be [0, 1]: " + readPr);
+            return -1;
+          }
+        } else if (args[i].equals("-writeProbability")) {
+          writePr = Double.parseDouble(args[++i]);
+          if (writePr<0 || writePr>1) {
+            System.err.println( 
+                "The write probability must be [0, 1]: " + writePr);
+            return -1;
+          }
+        } else if (args[i].equals("-root")) {
+          root = new Path(args[++i]);
+        } else if (args[i].equals("-maxDelayBetweenOps")) {
+          maxDelayBetweenOps = Integer.parseInt(args[++i]); // in milliseconds
+        } else if (args[i].equals("-numOfThreads")) {
+          numOfThreads = Integer.parseInt(args[++i]);
+          if (numOfThreads <= 0) {
+            System.err.println(
+                "Number of threads must be positive: " + numOfThreads);
+            return -1;
+          }
+        } else if (args[i].equals("-startTime")) {
+          startTime = Long.parseLong(args[++i]);
+        } else if (args[i].equals("-elapsedTime")) {
+          elapsedTime = Long.parseLong(args[++i]);
+        } else if (args[i].equals("-seed")) {
+          r = new Random(Long.parseLong(args[++i])+hostHashCode);
+        } else {
+          System.err.println(USAGE);
+          ToolRunner.printGenericCommandUsage(System.err);
+          return -1;
+        }
+      }
+    } catch (NumberFormatException e) {
+      System.err.println("Illegal parameter: " + e.getLocalizedMessage());
+      System.err.println(USAGE);
+      return -1;
+    }
+
+    if (readPr+writePr <0 || readPr+writePr>1) {
+      System.err.println(
+          "The sum of read probability and write probability must be [0, 1]: " +
+          readPr + " "+writePr);
+      return -1;
+    }
+    
+    if (r==null) {
+      r = new Random(System.currentTimeMillis()+hostHashCode);
+    }
+    
+    return initFileDirTables();
+  }
+  
+  /** Create a table that contains all directories under root and
+   * another table that contains all files under root.
+   */
+  private int initFileDirTables() {
+    try {
+      initFileDirTables(root);
+    } catch (IOException e) {
+      System.err.println(e.getLocalizedMessage());
+      e.printStackTrace();
+      return -1;
+    }
+    if (dirs.isEmpty()) {
+      System.err.println("The test space " + root + " is empty");
+      return -1;
+    }
+    if (files.isEmpty()) {
+      System.err.println("The test space " + root + 
+          " does not have any file");
+      return -1;
+    }
+    return 0;
+  }
+  
+  /** Create a table that contains all directories under the specified path and
+   * another table that contains all files under the specified path and
+   * whose name starts with "_file_".
+   */
+  private void initFileDirTables(Path path) throws IOException {
+    FileStatus[] stats = fs.listStatus(path);
+    if (stats != null) { 
+      for (FileStatus stat : stats) {
+        if (stat.isDir()) {
+          dirs.add(stat.getPath().toString());
+          initFileDirTables(stat.getPath());
+        } else {
+          Path filePath = stat.getPath();
+          if (filePath.getName().startsWith(StructureGenerator.FILE_NAME_PREFIX)) {
+            files.add(filePath.toString());
+          }
+        }
+      }
+    }
+  }
+  
+  /** Returns when the current number of seconds from the epoch equals
+   * the command line argument given by <code>-startTime</code>.
+   * This allows multiple instances of this program, running on clock
+   * synchronized nodes, to start at roughly the same time.
+   */
+  private void barrier() {
+    long sleepTime;
+    while ((sleepTime = startTime - System.currentTimeMillis()) > 0) {
+      try {
+        Thread.sleep(sleepTime);
+      } catch (InterruptedException ex) {
+      }
+    }
+  }
+
+  /** Create a file with a length of <code>fileSize</code>.
+   * The file is filled with 'a'.
+   */
+  private void genFile(Path file, long fileSize) throws IOException {
+    long startTime = System.currentTimeMillis();
+    FSDataOutputStream out = fs.create(file, true, 
+        getConf().getInt("io.file.buffer.size", 4096),
+        (short)getConf().getInt("dfs.replication", 3),
+        fs.getDefaultBlockSize());
+    executionTime[CREATE] += (System.currentTimeMillis()-startTime);
+    totalNumOfOps[CREATE]++;
+
+    for (long i=0; i<fileSize; i++) {
+      out.writeByte('a');
+    }
+    startTime = System.currentTimeMillis();
+    out.close();
+    executionTime[WRITE_CLOSE] += (System.currentTimeMillis()-startTime);
+    totalNumOfOps[WRITE_CLOSE]++;
+  }
+  
+  /** Main program
+   * 
+   * @param args command line arguments
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(),
+        new LoadGenerator(), args);
+    System.exit(res);
+  }
+
+}

+ 307 - 0
src/test/org/apache/hadoop/fs/loadGenerator/StructureGenerator.java

@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.loadGenerator;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This program generates a random namespace structure with the following
+ * constraints:
+ * 1. The number of subdirectories is a random number in [minWidth, maxWidth].
+ * 2. The maximum depth of each subdirectory is a random number 
+ *    [2*maxDepth/3, maxDepth].
+ * 3. Files are randomly placed in the empty directories. The size of each
+ *    file follows Gaussian distribution.
+ * The generated namespace structure is described by two files in the output
+ * directory. Each line of the first file 
+ * contains the full name of a leaf directory.  
+ * Each line of the second file contains
+ * the full name of a file and its size, separated by a blank.
+ * 
+ * The synopsis of the command is
+ * java StructureGenerator
+    -maxDepth <maxDepth> : maximum depth of the directory tree; default is 5.
+    -minWidth <minWidth> : minimum number of subdirectories per directories; default is 1
+    -maxWidth <maxWidth> : maximum number of subdirectories per directories; default is 5
+    -numOfFiles <#OfFiles> : the total number of files; default is 10.
+    -avgFileSize <avgFileSizeInBlocks>: average size of blocks; default is 1.
+    -outDir <outDir>: output directory; default is the current directory.
+    -seed <seed>: random number generator seed; default is the current time.
+ */
+public class StructureGenerator {
+  private int maxDepth = 5;
+  private int minWidth = 1;
+  private int maxWidth = 5;
+  private int numOfFiles = 10;
+  private double avgFileSize = 1;
+  private File outDir = DEFAULT_STRUCTURE_DIRECTORY;
+  final static private String USAGE = "java StructureGenerator\n" +
+  	"-maxDepth <maxDepth>\n" +
+    "-minWidth <minWidth>\n" +
+    "-maxWidth <maxWidth>\n" +
+    "-numOfFiles <#OfFiles>\n" +
+    "-avgFileSize <avgFileSizeInBlocks>\n" +
+    "-outDir <outDir>\n" +
+    "-seed <seed>";
+  
+  private Random r = null; 
+  
+  /** Default directory for storing file/directory structure */
+  final static File DEFAULT_STRUCTURE_DIRECTORY = new File(".");
+  /** The name of the file for storing directory structure */
+  final static String DIR_STRUCTURE_FILE_NAME = "dirStructure";
+  /** The name of the file for storing file structure */
+  final static String FILE_STRUCTURE_FILE_NAME = "fileStructure";
+  /** The name prefix for the files created by this program */
+  final static String FILE_NAME_PREFIX = "_file_";
+  
+  /**
+   * The main function first parses the command line arguments,
+   * then generates in-memory directory structure and outputs to a file,
+   * last generates in-memory files and outputs them to a file.
+   */
+  public int run(String[] args) throws Exception {
+    int exitCode = 0;
+    exitCode = init(args);
+    if (exitCode != 0) {
+      return exitCode;
+    }
+    genDirStructure();
+    output(new File(outDir, DIR_STRUCTURE_FILE_NAME));
+    genFileStructure();
+    outputFiles(new File(outDir, FILE_STRUCTURE_FILE_NAME));
+    return exitCode;
+  }
+
+  /** Parse the command line arguments and initialize the data */
+  private int init(String[] args) {
+    try {
+      for (int i = 0; i < args.length; i++) { // parse command line
+        if (args[i].equals("-maxDepth")) {
+          maxDepth = Integer.parseInt(args[++i]);
+          if (maxDepth<1) {
+            System.err.println("maxDepth must be positive: " + maxDepth);
+            return -1;
+          }
+        } else if (args[i].equals("-minWidth")) {
+          minWidth = Integer.parseInt(args[++i]);
+          if (minWidth<0) {
+            System.err.println("minWidth must be positive: " + minWidth);
+            return -1;
+          }
+        } else if (args[i].equals("-maxWidth")) {
+          maxWidth = Integer.parseInt(args[++i]);
+        } else if (args[i].equals("-numOfFiles")) {
+          numOfFiles = Integer.parseInt(args[++i]);
+          if (numOfFiles<1) {
+            System.err.println("NumOfFiles must be positive: " + numOfFiles);
+            return -1;
+          }
+        } else if (args[i].equals("-avgFileSize")) {
+          avgFileSize = Double.parseDouble(args[++i]);
+          if (avgFileSize<=0) {
+            System.err.println("AvgFileSize must be positive: " + avgFileSize);
+            return -1;
+          }
+        } else if (args[i].equals("-outDir")) {
+          outDir = new File(args[++i]);
+        } else if (args[i].equals("-seed")) {
+          r = new Random(Long.parseLong(args[++i]));
+        } else {
+          System.err.println(USAGE);
+          ToolRunner.printGenericCommandUsage(System.err);
+          return -1;
+        }
+      }
+    } catch (NumberFormatException e) {
+      System.err.println("Illegal parameter: " + e.getLocalizedMessage());
+      System.err.println(USAGE);
+      return -1;
+    }
+    
+    if (maxWidth < minWidth) {
+      System.err.println(
+          "maxWidth must be bigger than minWidth: " + maxWidth);
+      return -1;
+    }
+    
+    if (r==null) {
+      r = new Random();
+    }
+    return 0;
+  }
+  
+  /** In memory representation of a directory */
+  private static class INode {
+    private String name;
+    private List<INode> children = new ArrayList<INode>();
+    
+    /** Constructor */
+    private INode(String name) {
+      this.name = name;
+    }
+    
+    /** Add a child (subdir/file) */
+    private void addChild(INode child) {
+      children.add(child);
+    }
+    
+    /** Output the subtree rooted at the current node. 
+     * Only the leaves are printed.
+     */
+    private void output(PrintStream out, String prefix) {
+      prefix = prefix==null?name:prefix+"/"+name;
+      if (children.isEmpty()) {
+        out.println(prefix);
+      } else {
+        for (INode child : children) {
+          child.output(out, prefix);
+        }
+      }
+    }
+    
+    /** Output the files in the subtree rooted at this node */
+    protected void outputFiles(PrintStream out, String prefix) {
+      prefix = prefix==null?name:prefix+"/"+name;
+      for (INode child : children) {
+        child.outputFiles(out, prefix);
+      }
+    }
+    
+    /** Add all the leaves in the subtree to the input list */
+    private void getLeaves(List<INode> leaves) {
+      if (children.isEmpty()) {
+        leaves.add(this);
+      } else {
+        for (INode child : children) {
+          child.getLeaves(leaves);
+        }
+      }
+    }
+  }
+  
+  /** In memory representation of a file */
+  private static class FileINode extends INode {
+    private double numOfBlocks;
+
+    /** constructor */
+    private FileINode(String name, double numOfBlocks) {
+      super(name);
+      this.numOfBlocks = numOfBlocks;
+    }
+    
+    /** Output a file attribute */
+    protected void outputFiles(PrintStream out, String prefix) {
+      prefix = (prefix == null)?super.name: prefix + "/"+super.name;
+      out.println(prefix + " " + numOfBlocks);
+    }
+  }
+
+  private INode root;
+  
+  /** Generates a directory tree with a max depth of <code>maxDepth</code> */
+  private void genDirStructure() {
+    root = genDirStructure("", maxDepth);
+  }
+  
+  /** Generate a directory tree rooted at <code>rootName</code>
+   * The number of subtree is in the range of [minWidth, maxWidth].
+   * The maximum depth of each subtree is in the range of
+   * [2*maxDepth/3, maxDepth].
+   */
+  private INode genDirStructure(String rootName, int maxDepth) {
+    INode root = new INode(rootName);
+    
+    if (maxDepth>0) {
+      maxDepth--;
+      int minDepth = maxDepth*2/3;
+      // Figure out the number of subdirectories to generate
+      int numOfSubDirs = minWidth + r.nextInt(maxWidth-minWidth+1);
+      // Expand the tree
+      for (int i=0; i<numOfSubDirs; i++) {
+        int childDepth = (maxDepth == 0)?0:
+          (r.nextInt(maxDepth-minDepth+1)+minDepth);
+        INode child = genDirStructure("dir"+i, childDepth);
+        root.addChild(child);
+      }
+    }
+    return root;
+  }
+  
+  /** Collects leaf nodes in the tree */
+  private List<INode> getLeaves() {
+    List<INode> leaveDirs = new ArrayList<INode>();
+    root.getLeaves(leaveDirs);
+    return leaveDirs;
+  }
+  
+  /** Decides where to place all the files and its length.
+   * It first collects all empty directories in the tree.
+   * For each file, it randomly chooses an empty directory to place the file.
+   * The file's length is generated using Gaussian distribution.
+   */
+  private void genFileStructure() {
+    List<INode> leaves = getLeaves();
+    int totalLeaves = leaves.size();
+    for (int i=0; i<numOfFiles; i++) {
+      int leaveNum = r.nextInt(totalLeaves);
+      double fileSize;
+      do {
+        fileSize = r.nextGaussian()+avgFileSize;
+      } while (fileSize<0);
+      leaves.get(leaveNum).addChild(
+          new FileINode(FILE_NAME_PREFIX+i, fileSize));
+    }
+  }
+  
+  /** Output directory structure to a file, each line of the file
+   * contains the directory name. Only empty directory names are printed. */
+  private void output(File outFile) throws FileNotFoundException {
+    System.out.println("Printing to " + outFile.toString());
+    PrintStream out = new PrintStream(outFile);
+    root.output(out, null);
+    out.close();
+  }
+  
+  /** Output all files' attributes to a file, each line of the output file
+   * contains a file name and its length. */
+  private void outputFiles(File outFile) throws FileNotFoundException {
+    System.out.println("Printing to " + outFile.toString());
+    PrintStream out = new PrintStream(outFile);
+    root.outputFiles(out, null);
+    out.close();
+  }
+  
+  /**
+   * Main program
+   * @param args Command line arguments
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception {
+    StructureGenerator sg = new StructureGenerator();
+    System.exit(sg.run(args));
+  }
+}

+ 217 - 0
src/test/org/apache/hadoop/fs/loadGenerator/TestLoadGenerator.java

@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.loadGenerator;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+
+import junit.framework.TestCase;
+/**
+ * This class tests if a balancer schedules tasks correctly.
+ */
+public class TestLoadGenerator extends TestCase {
+  private static final Configuration CONF = new Configuration();
+  private static final int DEFAULT_BLOCK_SIZE = 10;
+  private static final String OUT_DIR = 
+    System.getProperty("test.build.data","build/test/data");
+  private static final File DIR_STRUCTURE_FILE = 
+    new File(OUT_DIR, StructureGenerator.DIR_STRUCTURE_FILE_NAME);
+  private static final File FILE_STRUCTURE_FILE =
+    new File(OUT_DIR, StructureGenerator.FILE_STRUCTURE_FILE_NAME);
+  private static final String DIR_STRUCTURE_FIRST_LINE = "/dir0";
+  private static final String DIR_STRUCTURE_SECOND_LINE = "/dir1";
+  private static final String FILE_STRUCTURE_FIRST_LINE =
+    "/dir0/_file_0 0.3754598635933768";
+  private static final String FILE_STRUCTURE_SECOND_LINE =
+    "/dir1/_file_1 1.4729310851145203";
+  
+
+  static {
+    CONF.setLong("dfs.block.size", DEFAULT_BLOCK_SIZE);
+    CONF.setInt("io.bytes.per.checksum", DEFAULT_BLOCK_SIZE);
+    CONF.setLong("dfs.heartbeat.interval", 1L);
+  }
+
+  /** Test if the structure generator works fine */ 
+  public void testStructureGenerator() throws Exception {
+    StructureGenerator sg = new StructureGenerator();
+    String[] args = new String[]{"-maxDepth", "2", "-minWidth", "1",
+        "-maxWidth", "2", "-numOfFiles", "2",
+        "-avgFileSize", "1", "-outDir", OUT_DIR, "-seed", "1"};
+    
+    final int MAX_DEPTH = 1;
+    final int MIN_WIDTH = 3;
+    final int MAX_WIDTH = 5;
+    final int NUM_OF_FILES = 7;
+    final int AVG_FILE_SIZE = 9;
+    final int SEED = 13;
+    try {
+      // successful case
+      assertEquals(0, sg.run(args));
+      BufferedReader in = new BufferedReader(new FileReader(DIR_STRUCTURE_FILE));
+      assertEquals(DIR_STRUCTURE_FIRST_LINE, in.readLine());
+      assertEquals(DIR_STRUCTURE_SECOND_LINE, in.readLine());
+      assertEquals(null, in.readLine());
+      in.close();
+      
+      in = new BufferedReader(new FileReader(FILE_STRUCTURE_FILE));
+      assertEquals(FILE_STRUCTURE_FIRST_LINE, in.readLine());
+      assertEquals(FILE_STRUCTURE_SECOND_LINE, in.readLine());
+      assertEquals(null, in.readLine());
+      in.close();
+
+      String oldArg = args[MAX_DEPTH];
+      args[MAX_DEPTH] = "0";
+      assertEquals(-1, sg.run(args));
+      args[MAX_DEPTH] = oldArg;
+      
+      oldArg = args[MIN_WIDTH];
+      args[MIN_WIDTH] = "-1";
+      assertEquals(-1, sg.run(args));
+      args[MIN_WIDTH] = oldArg;
+      
+      oldArg = args[MAX_WIDTH];
+      args[MAX_WIDTH] = "-1";
+      assertEquals(-1, sg.run(args));
+      args[MAX_WIDTH] = oldArg;
+      
+      oldArg = args[NUM_OF_FILES];
+      args[NUM_OF_FILES] = "-1";
+      assertEquals(-1, sg.run(args));
+      args[NUM_OF_FILES] = oldArg;
+      
+      oldArg = args[NUM_OF_FILES];
+      args[NUM_OF_FILES] = "-1";
+      assertEquals(-1, sg.run(args));
+      args[NUM_OF_FILES] = oldArg;
+      
+      oldArg = args[AVG_FILE_SIZE];
+      args[AVG_FILE_SIZE] = "-1";
+      assertEquals(-1, sg.run(args));
+      args[AVG_FILE_SIZE] = oldArg;
+      
+      oldArg = args[SEED];
+      args[SEED] = "34.d4";
+      assertEquals(-1, sg.run(args));
+      args[SEED] = oldArg;
+    } finally {
+      DIR_STRUCTURE_FILE.delete();
+      FILE_STRUCTURE_FILE.delete();
+    }
+  }
+
+  /** Test if the load generator works fine */
+  public void testLoadGenerator() throws Exception {
+    final String TEST_SPACE_ROOT = "/test";
+
+    FileWriter writer = new FileWriter(DIR_STRUCTURE_FILE);
+    writer.write(DIR_STRUCTURE_FIRST_LINE+"\n");
+    writer.write(DIR_STRUCTURE_SECOND_LINE+"\n");
+    writer.close();
+    
+    writer = new FileWriter(FILE_STRUCTURE_FILE);
+    writer.write(FILE_STRUCTURE_FIRST_LINE+"\n");
+    writer.write(FILE_STRUCTURE_SECOND_LINE+"\n");
+    writer.close();
+    
+    MiniDFSCluster cluster = new MiniDFSCluster(CONF, 3, true, null);
+    cluster.waitActive();
+    
+    try {
+      DataGenerator dg = new DataGenerator();
+      dg.setConf(CONF);
+      String [] args = new String[] {"-inDir", OUT_DIR, "-root", TEST_SPACE_ROOT};
+      assertEquals(0, dg.run(args));
+
+      final int READ_PROBABILITY = 1;
+      final int WRITE_PROBABILITY = 3;
+      final int MAX_DELAY_BETWEEN_OPS = 7;
+      final int NUM_OF_THREADS = 9;
+      final int START_TIME = 11;
+      final int ELAPSED_TIME = 13;
+      
+      LoadGenerator lg = new LoadGenerator();
+      lg.setConf(CONF);
+      args = new String[] {"-readProbability", "0.3", "-writeProbability", "0.3",
+          "-root", TEST_SPACE_ROOT, "-maxDelayBetweenOps", "0",
+          "-numOfThreads", "1", "-startTime", 
+          Long.toString(System.currentTimeMillis()), "-elapsedTime", "10"};
+      
+      assertEquals(0, lg.run(args));
+
+      String oldArg = args[READ_PROBABILITY];
+      args[READ_PROBABILITY] = "1.1";
+      assertEquals(-1, lg.run(args));
+      args[READ_PROBABILITY] = "-1.1";
+      assertEquals(-1, lg.run(args));
+      args[READ_PROBABILITY] = oldArg;
+
+      oldArg = args[WRITE_PROBABILITY];
+      args[WRITE_PROBABILITY] = "1.1";
+      assertEquals(-1, lg.run(args));
+      args[WRITE_PROBABILITY] = "-1.1";
+      assertEquals(-1, lg.run(args));
+      args[WRITE_PROBABILITY] = "0.9";
+      assertEquals(-1, lg.run(args));
+      args[READ_PROBABILITY] = oldArg;
+
+      oldArg = args[MAX_DELAY_BETWEEN_OPS];
+      args[MAX_DELAY_BETWEEN_OPS] = "1.x1";
+      assertEquals(-1, lg.run(args));
+      args[MAX_DELAY_BETWEEN_OPS] = oldArg;
+      
+      oldArg = args[MAX_DELAY_BETWEEN_OPS];
+      args[MAX_DELAY_BETWEEN_OPS] = "1.x1";
+      assertEquals(-1, lg.run(args));
+      args[MAX_DELAY_BETWEEN_OPS] = oldArg;
+      
+      oldArg = args[NUM_OF_THREADS];
+      args[NUM_OF_THREADS] = "-1";
+      assertEquals(-1, lg.run(args));
+      args[NUM_OF_THREADS] = oldArg;
+      
+      oldArg = args[START_TIME];
+      args[START_TIME] = "-1";
+      assertEquals(-1, lg.run(args));
+      args[START_TIME] = oldArg;
+
+      oldArg = args[ELAPSED_TIME];
+      args[ELAPSED_TIME] = "-1";
+      assertEquals(-1, lg.run(args));
+      args[ELAPSED_TIME] = oldArg;
+    } finally {
+      cluster.shutdown();
+      DIR_STRUCTURE_FILE.delete();
+      FILE_STRUCTURE_FILE.delete();
+    }
+  }
+  
+  /**
+   * @param args
+   */
+  public static void main(String[] args) throws Exception {
+    TestLoadGenerator loadGeneratorTest = new TestLoadGenerator();
+    loadGeneratorTest.testStructureGenerator();
+    loadGeneratorTest.testLoadGenerator();
+  }
+}

部分文件因为文件数量过多而无法显示